Skip to content

Commit

Permalink
Expose full search functionality of CKAN and fix warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
mcarans committed Oct 24, 2016
1 parent c32705a commit cffb7d7
Show file tree
Hide file tree
Showing 13 changed files with 261 additions and 218 deletions.
377 changes: 199 additions & 178 deletions .idea/workspace.xml

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,11 @@ You can read an existing HDX object with the static `read_from_hdx` method whi

You can search for datasets and resources in HDX using the `search_in_hdx` method which takes a configuration and a query parameter and returns the a list of objects of the appropriate HDX object type eg. `list[Dataset]` eg.

datasets = Dataset.search_in_hdx(configuration, 'QUERY')
datasets = Dataset.search_in_hdx(configuration, 'QUERY', **kwargs)

The query parameter takes a different format depending upon whether it is for a [dataset](http://lucene.apache.org/core/3_6_0/queryparsersyntax.html) or a [resource](http://docs.ckan.org/en/ckan-2.3.4/api/index.html#ckan.logic.action.get.resource_search).
The query parameter takes a different format depending upon whether it is for a [dataset](http://lucene.apache.org/core/3_6_0/queryparsersyntax.html) or a [resource](http://docs.ckan.org/en/ckan-2.3.4/api/index.html#ckan.logic.action.get.resource_search).

Various additional arguments (`**kwargs`) can be supplied. These are detailed in the API documentation.

You can create an HDX Object, such as a dataset, resource or gallery item by calling the constructor with a configuration, which is required, and an optional dictionary containing metadata. For example:

Expand Down
27 changes: 18 additions & 9 deletions hdx/data/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ def init_resources(self) -> None:
None
"""
self.resources = list()
""":type : List[Resource]"""

def add_update_resource(self, resource: Any) -> None:
"""Add new or update existing resource in dataset with new metadata
Expand All @@ -99,14 +100,13 @@ def add_update_resource(self, resource: Any) -> None:
Returns:
None
"""
if isinstance(resource, dict):
resource = Resource(self.configuration, resource)
if isinstance(resource, Resource):
if 'package_id' in resource:
raise HDXError("Resource %s being added already has a dataset id!" % (resource['name']))
self._addupdate_hdxobject(self.resources, 'name', self._underlying_object, resource)
return
if isinstance(resource, dict):
self._addupdate_hdxobject(self.resources, 'name', Resource, resource)
return
raise HDXError("Type %s cannot be added as a resource!" % type(resource).__name__)

def add_update_resources(self, resources: List[Any]) -> None:
Expand Down Expand Up @@ -164,14 +164,13 @@ def add_update_galleryitem(self, galleryitem) -> None:
None
"""
if isinstance(galleryitem, dict):
galleryitem = GalleryItem(self.configuration, galleryitem)
if isinstance(galleryitem, GalleryItem):
if 'dataset_id' in galleryitem:
raise HDXError("Gallery item %s being added already has a dataset id!" % (galleryitem['name']))
self._addupdate_hdxobject(self.gallery, 'title', self._underlying_object, galleryitem)
return
if isinstance(galleryitem, dict):
self._addupdate_hdxobject(self.gallery, 'title', GalleryItem, galleryitem)
return
raise HDXError("Type %s cannot be added as a gallery item!" % type(galleryitem).__name__)

def add_update_gallery(self, gallery: List[Any]):
Expand Down Expand Up @@ -430,20 +429,30 @@ def delete_from_hdx(self) -> None:
self._delete_from_hdx('dataset', 'id')

@staticmethod
def search_in_hdx(configuration: Configuration, query: str) -> List['Dataset']:
def search_in_hdx(configuration: Configuration, query: str, **kwargs) -> List['Dataset']:
"""Searches for datasets in HDX
Args:
configuration (Configuration): HDX Configuration
query (str): Query
query (str): Query (in Solr format). Defaults to '*:*'.
**kwargs: See below
fq (string): Any filter queries to apply
sort (string): Sorting of the search results. Defaults to 'relevance asc, metadata_modified desc'.
rows (int): Number of matching rows to return
start (int): Offset in the complete result for where the set of returned datasets should begin
facet (string): Whether to enable faceted results. Default to True.
facet.mincount (int): Minimum counts for facet fields should be included in the results
facet.limit (int): Maximum number of values the facet fields return (- = unlimited). Defaults to 50.
facet.field (List[str]): Fields to facet upon. Default is empty.
use_default_schema (bool): Use default package schema instead of custom schema. Defaults to False.
Returns:
List[Dataset]: List of datasets resulting from query
"""

datasets = []
dataset = Dataset(configuration)
success, result = dataset._read_from_hdx('dataset', query, 'q')
success, result = dataset._read_from_hdx('dataset', query, 'q', **kwargs)
if result:
count = result.get('count', None)
if count:
Expand Down
9 changes: 5 additions & 4 deletions hdx/data/hdxobject.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,15 +84,15 @@ def update_from_json(self, path: str):

def _read_from_hdx(self, object_type: str, value: str, fieldname: Optional[str] = 'id',
action: Optional[str] = None,
other_fields: dict = {}) -> Union[Tuple[bool, dict], Tuple[bool, str]]:
**kwargs) -> Union[Tuple[bool, dict], Tuple[bool, str]]:
"""Makes a read call to HDX passing in given parameter.
Args:
object_type (str): Description of HDX object type (for messages)
value (str): Value of HDX field
fieldname (Optional[str]): HDX field name. Defaults to id.
action (Optional[str]): Replacement CKAN action url to use. Defaults to None.
other_fields (dict): Other fields to pass to CKAN. Defaults to empty dict.
**kwargs: Other fields to pass to CKAN.
Returns:
(bool, dict/str): (True/False, HDX object metadata/Error)
Expand All @@ -105,7 +105,7 @@ def _read_from_hdx(self, object_type: str, value: str, fieldname: Optional[str]
else:
action = self.actions()['show']
data = {fieldname: value}
data.update(other_fields)
data.update(kwargs)
try:
result = self.hdxpostsite.call_action(action, data,
requests_kwargs={'auth': self.configuration._get_credentials()})
Expand Down Expand Up @@ -394,7 +394,8 @@ def _separate_hdxobjects(self, hdxobjects: List[HDXObjectUpperBound], hdxobjects
Returns:
None
"""
new_hdxobjects = self.data.get(hdxobjects_name, None)
new_hdxobjects = self.data.get(hdxobjects_name, list())
""":type : List[HDXObjectUpperBound]"""
if new_hdxobjects:
hdxobject_names = set()
for hdxobject in hdxobjects:
Expand Down
15 changes: 10 additions & 5 deletions hdx/data/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,16 @@ def delete_from_hdx(self) -> None:
self._delete_from_hdx('resource', 'id')

@staticmethod
def search_in_hdx(configuration: Configuration, query: str) -> List['Resource']:
def search_in_hdx(configuration: Configuration, query: str, **kwargs) -> List['Resource']:
"""Searches for resources in HDX
Args:
configuration (Configuration): HDX Configuration
query (str): Query
**kwargs: See below
order_by (str): A field on the Resource model that orders the results
offset (int): Apply an offset to the query
limit (int): Apply a limit to the query
Returns:
List[Resource]: List of resources resulting from query
"""
Expand All @@ -153,7 +156,7 @@ def delete_datastore(self) -> None:
"""
success, result = self._read_from_hdx('datastore', self.data['id'], 'resource_id',
self.actions()['datastore_delete'],
{'force': True})
force=True)
if not success:
logger.debug(result)

Expand All @@ -176,6 +179,7 @@ def create_datastore(self, schema: List[dict], primary_key: Optional[str] = None

data = {'resource_id': self.data['id'], 'force': True, 'fields': schema, 'primary_key': primary_key}
self._write_to_hdx('datastore_create', data, 'id')
f = None
try:
f = open(path, 'r')
reader = csv.DictReader(f)
Expand All @@ -192,8 +196,9 @@ def create_datastore(self, schema: List[dict], primary_key: Optional[str] = None
except Exception as e:
raise HDXError('Upload to datastore of %s failed!' % url) from e
finally:
f.close()
os.unlink(path)
if f:
f.close()
os.unlink(path)

def create_datastore_from_dict_schema(self, data: dict) -> None:
"""Creates a resource in the HDX datastore from a YAML file containing a list of fields and types of
Expand Down
12 changes: 7 additions & 5 deletions hdx/utilities/dictionary.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,22 @@
"""Dict utilities"""
from collections import UserDict

from typing import List, Optional
from typing import List, Optional, TypeVar

DictUpperBound = TypeVar('T', bound='dict')

def merge_two_dictionaries(a: dict, b: dict) -> dict:

def merge_two_dictionaries(a: DictUpperBound, b: DictUpperBound) -> DictUpperBound:
"""Merges b into a and returns merged result
NOTE: tuples and arbitrary objects are not handled as it is totally ambiguous what should happen
Args:
a (dict): dictionary to merge into
b: (dict): dictionary to merge from
a (DictUpperBound): dictionary to merge into
b: (DictUpperBound): dictionary to merge from
Returns:
dict: Merged dictionary
DictUpperBound: Merged dictionary
"""
key = None
# ## debug output
Expand Down
4 changes: 3 additions & 1 deletion hdx/utilities/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def download_file(url: str, path: Optional[str] = None) -> str:
raise DownloadError('Download of %s failed in setup of stream!' % url) from e
if r.status_code != 200:
raise DownloadError('Download of %s failed in setup of stream!' % url)
f = None
try:
if path:
f = open(path, 'wb')
Expand All @@ -42,7 +43,8 @@ def download_file(url: str, path: Optional[str] = None) -> str:
except Exception as e:
raise DownloadError('Download of %s failed in retrieval of stream!' % url) from e
finally:
f.close()
if f:
f.close()


def get_headers(url: str, timeout: Optional[float] = None) -> dict:
Expand Down
2 changes: 1 addition & 1 deletion hdx/utilities/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
import json
import os
import sys
from typing import List, Any, Optional

import yaml
from typing import List, Any, Optional

from .dictionary import merge_two_dictionaries, merge_dictionaries

Expand Down
12 changes: 6 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
ckanapi==3.6
colorlog==2.6.3
ndg-httpsclient==0.4.0
colorlog==2.7.0
ndg-httpsclient==0.4.2
pyasn1==0.1.9
pyOpenSSL==16.0.0
pyaml==15.8.2
requests==2.9.1
pyOpenSSL==16.2.0
pyaml == 16.9.0
requests==2.11.1
scraperwiki==0.5.1
typing==3.5.1
typing==3.5.2.2
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

setup(
name='hdx-python-api',
version='0.5',
version='0.51',
packages=find_packages(exclude=['ez_setup', 'tests', 'tests.*']),
url='http://data.humdata.org/',
license='PSF',
Expand Down
6 changes: 3 additions & 3 deletions test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pytest==2.9.2
pytest-cov==2.2.1
pytest-pythonpath==0.7
pytest==3.0.3
pytest-cov==2.4.0
pytest-pythonpath==0.7.1
logging_tree==1.7
-r requirements.txt
4 changes: 2 additions & 2 deletions tests/hdx/data/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,11 @@ class TestDataset():
}

resources_data = [{"id": "de6549d8-268b-4dfe-adaf-a4ae5c8510d5", "description": "Resource1",
"package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource1",
"name": "Resource1",
"url": "http://resource1.xlsx",
"format": "xlsx"},
{"id": "DEF", "description": "Resource2",
"package_id": "6f36a41c-f126-4b18-aaaf-6c2ddfbc5d4d", "name": "Resource2",
"name": "Resource2",
"url": "http://resource2.csv",
"format": "csv"}]

Expand Down
3 changes: 2 additions & 1 deletion workingexample/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,14 @@
'''
import logging

from hdx.configuration import Configuration
from hdx.facades.scraperwiki import facade
from .my_code import generate_dataset

logger = logging.getLogger(__name__)


def main(configuration: dict):
def main(configuration: Configuration):
'''Generate dataset and create it in HDX'''

dataset = generate_dataset(configuration)
Expand Down

0 comments on commit cffb7d7

Please sign in to comment.