Skip to content

Commit

Permalink
Making SIA1 discovery roughly work.
Browse files Browse the repository at this point in the history
Also, new method set_services on _ImageDiscoverer to pass in a custom resource
list (for now, for testability)

Also, exposing RegistryResource and RegistryResults in the registry API;
they're really central in using the stuff, and we'll need them in type
annotations a lot.
  • Loading branch information
msdemlei committed Oct 24, 2023
1 parent af5c939 commit 3af9190
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 31 deletions.
1 change: 1 addition & 0 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,4 @@ Using `pyvo`
io/index
auth/index
utils/prototypes
utils/testing
97 changes: 68 additions & 29 deletions pyvo/discover/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from astropy.coordinates import SkyCoord

from ..dam import obscore
from .. import dal
from .. import registry


Expand Down Expand Up @@ -84,22 +85,23 @@ def from_sia1_recs(cls, sia1_result, filter_func):
for rec in sia1_result:
if not filter_func(rec):
continue

mapped = {
"dataproduct_type": "image" if rec.naxes == 2 else "cube",
"access_url": rec.acref,
"bandpass_hilimit": rec.em_max.to(u.m).value,
"bandpass_lolimit": rec.em_min.to(u.m).value,
"em_max": rec.bandpass_hilimit is not None
and rec.bandpass_hilimit.to(u.m).value,
"em_min": rec.bandpass_lolimit is not None
and rec.bandpass_lolimit.to(u.m).value,
# Sigh. Try to guess exposure time?
"t_min": rec.dateobs,
"t_max": rec.dateobs,
"t_min": rec.dateobs.mjd,
"t_max": rec.dateobs.mjd,
"access_estsize": rec.filesize/1024,
"access_format": rec.format,
"instrument_name": rec.instr,
"s_xsel1": rec.naxis[0],
"s_xsel2": rec.naxis[1],
"s_ra": rec.pos[0],
"s_dec": rec.pos[1],
"s_xel1": rec.naxis[0].to(u.pix).value,
"s_xel2": rec.naxis[1].to(u.pix).value,
"s_ra": rec.pos.icrs.ra.to(u.deg).value,
"s_dec": rec.pos.icrs.dec.to(u.deg).value,
"obs_title": rec.title,
# TODO: do more (s_resgion!) on the basis of the WCS parts
}
Expand All @@ -120,12 +122,17 @@ class _ImageDiscoverer:
diagnostics. This probably should not be considered API but
rather as an implementation detail of discover_images.
For now, we expose several methods to be called in succession
(see discover_images); that's because we *may* want to make
this API after all and admit user manipulation of our state
in between the larger steps.
The normal usage is do call discover_services(), which will locate
all VO services that may have relevant data. Alternatively, call
set_services(registry_results) with some result of a registry.search()
call. _ImageDiscoverer will then pick capabilities it can use out
of the resource records. Records without usable capabilities are
silently ignored.
Then call query_services to execute the discovery query on these
services.
See discover_images for a discussion of its constructor parameters.
See images_globally for a discussion of its constructor parameters.
"""
# Constraint defaults
# a float in metres
Expand All @@ -137,7 +144,8 @@ class _ImageDiscoverer:
# a radius as a float in degrees
radius = None

def __init__(self, space, spectrum, time, inclusive):
def __init__(self,
space=None, spectrum=None, time=None, inclusive=False):
if space:
self.center = (space[0], space[1])
self.radius = space[2]
Expand All @@ -151,8 +159,24 @@ def __init__(self, space, spectrum, time, inclusive):
self.inclusive = inclusive
self.results: List[obscore.ObsCoreMetadata] = []
self.log: List[str] = []
self.sia1_recs, self.sia2_recs, self.obscore_recs = [], [], []

def _purge_redundant_services(self):
"""removes services querying data already covered by more capable
services from our current services lists.
"""
def ids(recs):
return set(r.ivoid for r in recs)

self.sia1_recs = _clean_for(self.sia1_recs,
ids(self.sia2_recs)|ids(self.obscore_recs))
self.sia2_recs = _clean_for(self.sia2_recs, ids(self.obscore_recs))

# TODO: use futher heuristics to further cut down on dupes:
# Use relationships. I think we should tell people to use
# IsServiceFor for (say) SIA2 services built on top of TAP services.

def collect_services(self):
def discover_services(self):
"""fills the X_recs attributes with resources declaring coverage
for our constraints.
Expand Down Expand Up @@ -182,19 +206,27 @@ def collect_services(self):
self.obscore_recs = [Queriable(r) for r in registry.search(
registry.Datamodel("obscore"), *constraints)]

# Now remove resources presumably operating on the same underlying
# data collection. First, we deselect by ivoid, where a more powerful
# interface is available
def ids(recs):
return set(r.ivoid for r in recs)
self._purge_redundant_services()

self.sia1_recs = _clean_for(self.sia1_recs,
ids(self.sia2_recs)|ids(self.obscore_recs))
self.sia2_recs = _clean_for(self.sia2_recs, ids(self.obscore_recs))
def set_services(self,
registry_results: registry.RegistryResults) -> None:
"""as an alternative to discover_services, this sets the services
to be queried to the result of a custom registry query.
# TODO: use futher heuristics to further cut down on dupes:
# Use relationships. I think we should tell people to use
# IsServiceFor for (say) SIA2 services built on top of TAP services.
This will pick the "most capabable" interface from each record
and ignore records without image discovery capabilities.
"""
for rsc in registry_results:
if "tap" in rsc.access_modes():
# TODO: we ought to ensure there's an obscore
# table on this; but then: let's rather fix obscore
# discovery
self.obscore_recs.append(Queriable(rsc))
elif "sia2" in rsc.access_modes():
self.sia2_recs.append(Queriable(rsc))
elif "sia" in rsc.access_modes():
self.sia1_recs.append(Queriable(rsc))
# else ignore this record

def _query_one_sia1(self, rec: Queriable):
"""runs our query against a SIA1 capability of rec.
Expand All @@ -216,7 +248,7 @@ def non_spatial_filter(sia1_rec):
# metadata. TODO: require time to be an interval and
# then replace check for dateobs to be within that interval.
if self.time and not self.inclusive and sia1_rec.dateobs:
if not self.time-1<sia1_rec.dateobs<self.time+1:
if not self.time-1<sia1_rec.dateobs.mjd<self.time+1:
return False
return True

Expand All @@ -243,6 +275,7 @@ def _query_sia1(self):
self._query_one_sia1(rec)
except Exception as msg:
self.log.append(f"SIA1 {rec.ivoid} skipped: {msg}")
raise

def _query_one_sia2(self, rec: Queriable):
"""runs our query against a SIA2 capability of rec.
Expand Down Expand Up @@ -313,6 +346,12 @@ def query_services(self):
This creates fills the results and the log attributes.
"""
if (not self.sia1_recs
and not self.sia2_recs
and not self.obscore_recs):
raise dal.DALQueryError("No services to query. Unless"
" you overrode service selection, you will have to"
" loosen your constraints.")
self._query_sia1()
self._query_sia2()
self._query_obscore()
Expand Down Expand Up @@ -350,7 +389,7 @@ def images_globally(
comparisons with NULL-s false.
"""
discoverer = _ImageDiscoverer(space, spectrum, time, inclusive)
discoverer.collect_services()
discoverer.discover_services()
discoverer.query_services()
# TODO: We should un-dupe by image access URL
# TODO: We could compute SODA cutout URLs here in addition.
Expand Down

Large diffs are not rendered by default.

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version='1.0' encoding='utf-8'?>
<VOTABLE version="1.4" xmlns="http://www.ivoa.net/xml/VOTable/v1.3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.ivoa.net/xml/VOTable/v1.3 http://vo.ari.uni-heidelberg.de/docs/schemata/VOTable-1.4.xsd"><RESOURCE type="results"><INFO name="sql_query" value="SELECT ivoid, res_type, short_name, res_title, content_level, res_description, reference_url, creator_seq, content_type, source_format, source_value, region_of_regard, waveband, string_agg(COALESCE ( access_url , '' ), ':::py VO sep:::') AS access_urls, string_agg(COALESCE ( standard_id , '' ), ':::py VO sep:::') AS standard_ids, string_agg(COALESCE ( intf_type , '' ), ':::py VO sep:::') AS intf_types, string_agg(COALESCE ( intf_role , '' ), ':::py VO sep:::') AS intf_roles FROM rr.resource NATURAL LEFT OUTER JOIN rr.capability NATURAL LEFT OUTER JOIN rr.interface WHERE ( ivoid = 'ivo://org.gavo.dc/bgds/q/sia' ) GROUP BY ivoid , res_type , short_name , res_title , content_level , res_description , reference_url , creator_seq , content_type , source_format , source_value , region_of_regard , waveband LIMIT 20000">ADQL query translated to local SQL (for debugging)</INFO><INFO name="query" value="SELECT
ivoid, res_type, short_name, res_title, content_level, res_description, reference_url, creator_seq, content_type, source_format, source_value, region_of_regard, waveband,
ivo_string_agg(COALESCE(access_url, ''), ':::py VO sep:::') AS access_urls,
ivo_string_agg(COALESCE(standard_id, ''), ':::py VO sep:::') AS standard_ids,
ivo_string_agg(COALESCE(intf_type, ''), ':::py VO sep:::') AS intf_types,
ivo_string_agg(COALESCE(intf_role, ''), ':::py VO sep:::') AS intf_roles
FROM
rr.resource
NATURAL LEFT OUTER JOIN rr.capability
NATURAL LEFT OUTER JOIN rr.interface
WHERE
(ivoid = 'ivo://org.gavo.dc/bgds/q/sia')
GROUP BY
ivoid, res_type, short_name, res_title, content_level, res_description, reference_url, creator_seq, content_type, source_format, source_value, region_of_regard, waveband">Original ADQL query</INFO><INFO name="QUERY_STATUS" value="OK">Query successful</INFO><INFO name="server_software" value="DaCHS/2.8.1 twistedWeb/20.3.0">Software that produced this VOTable</INFO><INFO name="server" value="http://dc.zah.uni-heidelberg.de">Base URI of the server</INFO><INFO name="citation" ucd="" value="http://dc.zah.uni-heidelberg.de/__system__/tap/run/howtocite">Advice on citing this resource</INFO><INFO name="citation" ucd="" value="http://dc.zah.uni-heidelberg.de/tableinfo/rr.resource#ti-citing">Advice on citing this resource</INFO><INFO name="citation" ucd="" value="http://dc.zah.uni-heidelberg.de/tableinfo/rr.interface#ti-citing">Advice on citing this resource</INFO><INFO name="citation" ucd="" value="http://dc.zah.uni-heidelberg.de/tableinfo/rr.capability#ti-citing">Advice on citing this resource</INFO><INFO name="ivoid" ucd="meta.ref.ivoid" value="ivo://org.gavo.dc/__system__/tap/run">Originating VO resource</INFO><INFO name="publisher" value="The GAVO DC team">Data centre that has delivered the data</INFO><INFO name="request_date" ucd="time.creation" value="2023-10-24T08:31:27Z"></INFO><INFO name="contact" ucd="meta.email" value="[email protected]">Contact option</INFO><INFO name="reference_url" ucd="meta.ref.url" value="http://dc.zah.uni-heidelberg.de/__system__/tap/run/info">More information on the data Source</INFO><INFO name="reference_url" ucd="meta.ref.url" value="http://dc.zah.uni-heidelberg.de/tableinfo/rr.resource">More information on the data Source</INFO><INFO name="reference_url" ucd="meta.ref.url" value="http://dc.zah.uni-heidelberg.de/tableinfo/rr.interface">More information on the data Source</INFO><INFO name="reference_url" ucd="meta.ref.url" value="http://dc.zah.uni-heidelberg.de/tableinfo/rr.capability">More information on the data Source</INFO><INFO name="creator" ucd="meta.bib.author" value="GAVO Data Center">Name of a person or entity that produced a contributing resource</INFO><TABLE name="resource_capability_interface"><GROUP ID="ndhaiaghgmpa" name="note-cl"><DESCRIPTION>
The terms are taken from the vocabulary
http://ivoa.net/rdf/voresource/content_level.</DESCRIPTION><FIELDref ref="content_level"/></GROUP><GROUP ID="ndhaiaghgnoa" name="note-ct"><DESCRIPTION>
The terms are taken from the vocabulary
http://ivoa.net/rdf/voresource/content_type.</DESCRIPTION><FIELDref ref="content_type"/></GROUP><GROUP ID="ndhaiaghgmea" name="note-w"><DESCRIPTION>
The allowed values for waveband include:
Radio, Millimeter, Infrared, Optical, UV, EUV, X-ray, Gamma-ray.</DESCRIPTION><FIELDref ref="waveband"/></GROUP><FIELD ID="ivoid" arraysize="*" datatype="char" name="ivoid" utype="xpath:identifier"><DESCRIPTION>Unambiguous reference to the resource conforming to the IVOA standard for identifiers.</DESCRIPTION></FIELD><FIELD ID="res_type" arraysize="*" datatype="char" name="res_type" utype="xpath:@xsi:type"><DESCRIPTION>Resource type (something like vg:authority, vs:catalogservice, etc).</DESCRIPTION></FIELD><FIELD ID="short_name" arraysize="*" datatype="char" name="short_name" utype="xpath:shortName"><DESCRIPTION>A short name or abbreviation given to something, for presentation in space-constrained fields (up to 16 characters).</DESCRIPTION></FIELD><FIELD ID="res_title" arraysize="*" datatype="unicodeChar" name="res_title" utype="xpath:title"><DESCRIPTION>The full name given to the resource.</DESCRIPTION></FIELD><FIELD ID="content_level" arraysize="*" datatype="char" name="content_level" utype="xpath:content/contentLevel"><DESCRIPTION>A hash-separated list of content levels specifying the intended audience.</DESCRIPTION></FIELD><FIELD ID="res_description" arraysize="*" datatype="unicodeChar" name="res_description" utype="xpath:content/description"><DESCRIPTION>An account of the nature of the resource.</DESCRIPTION></FIELD><FIELD ID="reference_url" arraysize="*" datatype="char" name="reference_url" utype="xpath:content/referenceURL"><DESCRIPTION>URL pointing to a human-readable document describing this resource.</DESCRIPTION></FIELD><FIELD ID="creator_seq" arraysize="*" datatype="unicodeChar" name="creator_seq" utype="xpath:curation/creator/name"><DESCRIPTION>The creator(s) of the resource in the order given by the resource record author, separated by semicolons.</DESCRIPTION></FIELD><FIELD ID="content_type" arraysize="*" datatype="char" name="content_type" utype="xpath:content/type"><DESCRIPTION>A hash-separated list of natures or genres of the content of the resource.</DESCRIPTION></FIELD><FIELD ID="source_format" arraysize="*" datatype="char" name="source_format" utype="xpath:content/source/@format"><DESCRIPTION>The format of source_value. This, in particular, can be ``bibcode''.</DESCRIPTION></FIELD><FIELD ID="source_value" arraysize="*" datatype="unicodeChar" name="source_value" utype="xpath:content/source"><DESCRIPTION>A bibliographic reference from which the present resource is derived or extracted.</DESCRIPTION></FIELD><FIELD ID="region_of_regard" datatype="float" name="region_of_regard" unit="deg" utype="xpath:coverage/regionOfRegard"><DESCRIPTION>A single numeric value representing the angle, given in decimal degrees, by which a positional query against this resource should be ``blurred'' in order to get an appropriate match.</DESCRIPTION></FIELD><FIELD ID="waveband" arraysize="*" datatype="char" name="waveband" utype="xpath:coverage/waveband"><DESCRIPTION>A hash-separated list of regions of the electro-magnetic spectrum that the resource's spectral coverage overlaps with.</DESCRIPTION></FIELD><FIELD ID="access_urls" arraysize="*" datatype="char" name="access_urls"/><FIELD ID="standard_ids" arraysize="*" datatype="char" name="standard_ids"/><FIELD ID="intf_types" arraysize="*" datatype="char" name="intf_types"/><FIELD ID="intf_roles" arraysize="*" datatype="char" name="intf_roles"/><DATA><BINARY><STREAM encoding="base64">AAAAHGl2bzovL29yZy5nYXZvLmRjL2JnZHMvcS9zaWEAAAARdnM6Y2F0YWxvZ3NlcnZpY2UAAAAIYmdkcyBzaWEAAAApAEIAbwBjAGgAdQBtACAARwBhAGwAYQBjAHQAaQBjACAARABpAHMAawAgAFMAdQByAHYAZQB5ACAAKABCAEcARABTACkAIABpAG0AYQBnAGUAcwAAAAhyZXNlYXJjaAAAAgsAVABoAGUAIABCAG8AYwBoAHUAbQAgAEcAYQBsAGEAYwB0AGkAYwAgAEQAaQBzAGsAIABTAHUAcgB2AGUAeQAgAGkAcwAgAGEAbgAgAG8AbgBnAG8AaQBuAGcAIABwAHIAbwBqAGUAYwB0ACAAdABvACAAbQBvAG4AaQB0AG8AcgAgAHQAaABlAAoAcwB0AGUAbABsAGEAcgAgAGMAbwBuAHQAZQBuAHQAIABvAGYAIAB0AGgAZQAgAEcAYQBsAGEAYwB0AGkAYwAgAGQAaQBzAGsAIABpAG4AIABhACAANgAgAGQAZQBnAHIAZQBlACAAdwBpAGQAZQAgAHMAdAByAGkAcABlAAoAYwBlAG4AdABlAHIAZQBkACAAbwBuACAAdABoAGUAIABHAGEAbABhAGMAdABpAGMAIABwAGwAYQBuAGUALgAgAFQAaABlACAAZABhAHQAYQAgAGgAYQBzACAAYgBlAGUAbgAgAHIAZQBjAG8AcgBkAGUAZAAgAHMAaQBuAGMAZQAKAG0AaQBkAC0AMgAwADEAMAAgAGkAbgAgAFMAbABvAGEAbgAgAHIAIABhAG4AZAAgAGkAIABzAGkAbQB1AGwAdABhAG4AZQBvAHUAcwBsAHkAIAB3AGkAdABoACAAdABoAGUAIABSAG8AQgBvAFQAVAAgAFQAZQBsAGUAYwBzAG8AcABlACAAYQB0AAoAdABoAGUAIABVAG4AaQB2AGUAcgBzAGkAdABhAGUAdABzAHMAdABlAHIAbgB3AGEAcgB0AGUAIABCAG8AYwBoAHUAbQAgAG4AZQBhAHIAIABDAGUAcgByAG8AIABBAHIAbQBhAHoAbwBuAGUAcwAgAGkAbgAgAHQAaABlACAAQwBoAGkAbABlAGEAbgAKAEEAdABhAGMAYQBtAGEAIABkAGUAcwBlAHIAdAAuACAASQB0ACAAYwBvAG4AdABhAGkAbgBzACAAbQBlAGEAcwB1AHIAZQBtAGUAbgB0AHMAIABvAGYAIABhAGIAbwB1AHQAIAAyAHgAMQAwAF4ANwAgAHMAdABhAHIAcwAgAG8AdgBlAHIACgBtAG8AcgBlACAAdABoAGEAbgAgAHMAZQB2AGUAbgAgAHkAZQBhAHIAcwAuACAAQQBkAGQAaQB0AGkAbwBuAGEAbABsAHkALAAgAGkAbgB0AGUAcgBtAGkAdAB0AGUAbgB0ACAAbQBlAGEAcwB1AHIAZQBtAGUAbgB0AHMAIABpAG4ACgBKAG8AaABuAHMAbwBuACAAVQBWAEIAIABhAG4AZAAgAFMAbABvAGEAbgAgAHoAIABoAGEAdgBlACAAYgBlAGUAbgAgAHIAZQBjAG8AcgBkAGUAZAAgAGEAcwAgAHcAZQBsAGwALgAAAC9odHRwOi8vZGMuemFoLnVuaS1oZWlkZWxiZXJnLmRlL2JnZHMvcS9zaWEvaW5mbwAAACwASABhAGMAawBzAHQAZQBpAG4ALAAgAE0ALgA7ACAASABhAGEAcwAsACAATQAuADsAIABGAGUAaQBuACwAIABDAC4AOwAgAEMAaABpAG4AaQAsACAAUgAuAAAABnN1cnZleQAAAAdiaWJjb2RlAAAAEwAyADAAMQA1AEEATgAuAC4ALgAuADMAMwA2AC4ALgA1ADkAMABIf8AAAAAAAAdvcHRpY2FsAAAB6Wh0dHA6Ly9kYy56YWgudW5pLWhlaWRlbGJlcmcuZGUvYmdkcy9xL2RsL2RsbWV0YTo6OnB5IFZPIHNlcDo6Omh0dHA6Ly9kYy56YWgudW5pLWhlaWRlbGJlcmcuZGUvYmdkcy9xL2RsL2RsZ2V0Ojo6cHkgVk8gc2VwOjo6aHR0cDovL2RjLnphaC51bmktaGVpZGVsYmVyZy5kZS90YXA6OjpweSBWTyBzZXA6OjpodHRwOi8vZGMuemFoLnVuaS1oZWlkZWxiZXJnLmRlL2JnZHMvcS9zaWEvdGFibGVNZXRhZGF0YTo6OnB5IFZPIHNlcDo6Omh0dHA6Ly9kYy56YWgudW5pLWhlaWRlbGJlcmcuZGUvYmdkcy9xL3NpYS9jYXBhYmlsaXRpZXM6OjpweSBWTyBzZXA6OjpodHRwOi8vZGMuemFoLnVuaS1oZWlkZWxiZXJnLmRlL2JnZHMvcS9zaWEvYXZhaWxhYmlsaXR5Ojo6cHkgVk8gc2VwOjo6aHR0cDovL2RjLnphaC51bmktaGVpZGVsYmVyZy5kZS9CR0RTOjo6cHkgVk8gc2VwOjo6aHR0cDovL2RjLnphaC51bmktaGVpZGVsYmVyZy5kZS9iZ2RzL3Evc2lhL3NpYXAueG1sPwAAAURpdm86Ly9pdm9hLm5ldC9zdGQvZGF0YWxpbmsjbGlua3MtMS4xOjo6cHkgVk8gc2VwOjo6aXZvOi8vaXZvYS5uZXQvc3RkL3NvZGEjc3luYy0xLjA6OjpweSBWTyBzZXA6Ojppdm86Ly9pdm9hLm5ldC9zdGQvdGFwI2F1eDo6OnB5IFZPIHNlcDo6Oml2bzovL2l2b2EubmV0L3N0ZC92b3NpI3RhYmxlczo6OnB5IFZPIHNlcDo6Oml2bzovL2l2b2EubmV0L3N0ZC92b3NpI2NhcGFiaWxpdGllczo6OnB5IFZPIHNlcDo6Oml2bzovL2l2b2EubmV0L3N0ZC92b3NpI2F2YWlsYWJpbGl0eTo6OnB5IFZPIHNlcDo6Ojo6OnB5IFZPIHNlcDo6Oml2bzovL2l2b2EubmV0L3N0ZC9zaWEAAADKdnM6cGFyYW1odHRwOjo6cHkgVk8gc2VwOjo6dnM6cGFyYW1odHRwOjo6cHkgVk8gc2VwOjo6dnM6cGFyYW1odHRwOjo6cHkgVk8gc2VwOjo6dnM6cGFyYW1odHRwOjo6cHkgVk8gc2VwOjo6dnM6cGFyYW1odHRwOjo6cHkgVk8gc2VwOjo6dnM6cGFyYW1odHRwOjo6cHkgVk8gc2VwOjo6dnI6d2ViYnJvd3Nlcjo6OnB5IFZPIHNlcDo6OnZzOnBhcmFtaHR0cAAAAH5zdGQ6OjpweSBWTyBzZXA6OjpzdGQ6OjpweSBWTyBzZXA6OjpzdGQ6OjpweSBWTyBzZXA6OjpzdGQ6OjpweSBWTyBzZXA6OjpzdGQ6OjpweSBWTyBzZXA6OjpzdGQ6OjpweSBWTyBzZXA6Ojo6OjpweSBWTyBzZXA6OjpzdGQ=</STREAM></BINARY></DATA></TABLE></RESOURCE></VOTABLE>
Binary file not shown.
Loading

0 comments on commit 3af9190

Please sign in to comment.