Skip to content

Commit

Permalink
Initial attempt at enabling reading the columns from the datasource (#45
Browse files Browse the repository at this point in the history
)

Fixes #42 #46 
* Initial attempt at enabling reading the columns from the datasource
* Fixing pep8 errors for EOFEOL
* Changing to OrderedDict for getting columns
* Add documentation for the various column attributes
* rename column to field
* Fixed #46 encode apostrophes in field names
* Enable multilook up for Fields
* Rename properties on the field based on feedback given in #45
  • Loading branch information
Russell Hay authored Jul 1, 2016
1 parent aba2a35 commit 481f38c
Show file tree
Hide file tree
Showing 9 changed files with 337 additions and 4 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='tableaudocumentapi',
version='0.1.0-dev',
version='0.1.0.dev0',
author='Tableau Software',
author_email='[email protected]',
url='https://github.com/tableau/document-api-python',
Expand Down
2 changes: 2 additions & 0 deletions tableaudocumentapi/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .field import Field
from .connection import Connection
from .datasource import Datasource, ConnectionParser
from .workbook import Workbook

__version__ = '0.0.1'
__VERSION__ = __version__
35 changes: 32 additions & 3 deletions tableaudocumentapi/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,29 @@
# Datasource - A class for writing datasources to Tableau files
#
###############################################################################
import os
import collections
import xml.etree.ElementTree as ET
import xml.sax.saxutils as sax
import zipfile

import xml.etree.ElementTree as ET
from tableaudocumentapi import Connection, xfile
from tableaudocumentapi import Field
from tableaudocumentapi.multilookup_dict import MultiLookupDict


class ConnectionParser(object):
def _mapping_from_xml(root_xml, column_xml):
retval = Field.from_xml(column_xml)
local_name = retval.id
if "'" in local_name:
local_name = sax.escape(local_name, {"'": "'"})
xpath = ".//metadata-record[@class='column'][local-name='{}']".format(local_name)
metadata_record = root_xml.find(xpath)
if metadata_record is not None:
retval.apply_metadata(metadata_record)
return retval.id, retval


class ConnectionParser(object):
def __init__(self, datasource_xml, version):
self._dsxml = datasource_xml
self._dsversion = version
Expand Down Expand Up @@ -55,6 +69,7 @@ def __init__(self, dsxml, filename=None):
self._connection_parser = ConnectionParser(
self._datasourceXML, version=self._version)
self._connections = self._connection_parser.get_connections()
self._fields = None

@classmethod
def from_file(cls, filename):
Expand Down Expand Up @@ -115,3 +130,17 @@ def version(self):
@property
def connections(self):
return self._connections

###########
# fields
###########
@property
def fields(self):
if not self._fields:
self._fields = self._get_all_fields()
return self._fields

def _get_all_fields(self):
column_objects = (_mapping_from_xml(self._datasourceTree, xml)
for xml in self._datasourceTree.findall('.//column'))
return MultiLookupDict({k: v for k, v in column_objects})
89 changes: 89 additions & 0 deletions tableaudocumentapi/field.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import functools

_ATTRIBUTES = [
'id', # Name of the field as specified in the file, usually surrounded by [ ]
'caption', # Name of the field as displayed in Tableau unless an aliases is defined
'datatype', # Type of the field within Tableau (string, integer, etc)
'role', # Dimension or Measure
'type', # three possible values: quantitative, ordinal, or nominal
'alias', # Name of the field as displayed in Tableau if the default name isn't wanted
'calculation', # If this field is a calculated field, this will be the formula
]

_METADATA_ATTRIBUTES = [
'aggregation', # The type of aggregation on the field (e.g Sum, Avg)
]


def _find_metadata_record(record, attrib):
element = record.find('.//{}'.format(attrib))
if element is None:
return None
return element.text


class Field(object):
""" Represents a field in a datasource """

def __init__(self, xmldata):
for attrib in _ATTRIBUTES:
self._apply_attribute(xmldata, attrib, lambda x: xmldata.attrib.get(x, None))

# All metadata attributes begin at None
for attrib in _METADATA_ATTRIBUTES:
setattr(self, '_{}'.format(attrib), None)

def apply_metadata(self, metadata_record):
for attrib in _METADATA_ATTRIBUTES:
self._apply_attribute(metadata_record, attrib, functools.partial(_find_metadata_record, metadata_record))

@classmethod
def from_xml(cls, xmldata):
return cls(xmldata)

def __getattr__(self, item):
private_name = '_{}'.format(item)
if item in _ATTRIBUTES or item in _METADATA_ATTRIBUTES:
return getattr(self, private_name)
raise AttributeError(item)

def _apply_attribute(self, xmldata, attrib, default_func):
if hasattr(self, '_read_{}'.format(attrib)):
value = getattr(self, '_read_{}'.format(attrib))(xmldata)
else:
value = default_func(attrib)

setattr(self, '_{}'.format(attrib), value)

@property
def name(self):
""" Provides a nice name for the field which is derived from the alias, caption, or the id.
The name resolves as either the alias if it's defined, or the caption if alias is not defined,
and finally the id which is the underlying name if neither of the fields exist. """
alias = getattr(self, 'alias', None)
if alias:
return alias

caption = getattr(self, 'caption', None)
if caption:
return caption

return self.id

######################################
# Special Case handling methods for reading the values from the XML
######################################
@staticmethod
def _read_id(xmldata):
# ID is actually the name of the field, but to provide a nice name, we call this ID
return xmldata.attrib.get('name', None)

@staticmethod
def _read_calculation(xmldata):
# The formula for a calculation is stored in a child element, so we need to pull it out separately.
calc = xmldata.find('.//calculation')
if calc is None:
return None

return calc.attrib.get('formula', None)
49 changes: 49 additions & 0 deletions tableaudocumentapi/multilookup_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
def _resolve_value(key, value):
try:
retval = value.get(key, None)
if retval is None:
retval = value.getattr(key, None)
except AttributeError:
retval = None
return retval


def _build_index(key, d):
return {_resolve_value(key, v): k
for k, v in d.items()
if _resolve_value(key, v) is not None}


# TODO: Improve this to be more generic
class MultiLookupDict(dict):
def __init__(self, args=None):
if args is None:
args = {}
super(MultiLookupDict, self).__init__(args)
self._indexes = {
'alias': {},
'caption': {}
}
self._populate_indexes()

def _populate_indexes(self):
self._indexes['alias'] = _build_index('alias', self)
self._indexes['caption'] = _build_index('caption', self)

def __setitem__(self, key, value):
alias = _resolve_value('alias', value)
caption = _resolve_value('caption', value)
if alias is not None:
self._indexes['alias'][alias] = key
if caption is not None:
self._indexes['caption'][caption] = key

dict.__setitem__(self, key, value)

def __getitem__(self, key):
if key in self._indexes['alias']:
key = self._indexes['alias'][key]
elif key in self._indexes['caption']:
key = self._indexes['caption'][key]

return dict.__getitem__(self, key)
2 changes: 2 additions & 0 deletions test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from . import bvt
from . import test_datasource
86 changes: 86 additions & 0 deletions test/assets/datasource_test.tds
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
<?xml version='1.0' encoding='utf-8' ?>
<datasource formatted-name='postgres.1of3kl00aoax5d1a1ejma1397430' inline='true' source-platform='mac' version='9.3' xmlns:user='http://www.tableausoftware.com/xml/user'>
<connection authentication='username-password' class='postgres' dbname='TestV1' odbc-native-protocol='yes' port='5432' server='postgres91.test.tsi.lan' username='test'>
<relation name='xy' table='[public].[xy]' type='table' />
<metadata-records>
<metadata-record class='column'>
<remote-name>a</remote-name>
<remote-type>130</remote-type>
<local-name>[a]</local-name>
<parent-name>[xy]</parent-name>
<remote-alias>a</remote-alias>
<ordinal>1</ordinal>
<local-type>string</local-type>
<aggregation>Count</aggregation>
<width>255</width>
<contains-null>true</contains-null>
<attributes>
<attribute datatype='string' name='DebugRemoteType'>&quot;SQL_WVARCHAR&quot;</attribute>
<attribute datatype='string' name='DebugWireType'>&quot;SQL_C_WCHAR&quot;</attribute>
<attribute datatype='string' name='TypeIsVarchar'>&quot;true&quot;</attribute>
</attributes>
</metadata-record>
<metadata-record class='column'>
<remote-name>Today's Date</remote-name>
<remote-type>130</remote-type>
<local-name>[Today&apos;s Date]</local-name>
<parent-name>[xy]</parent-name>
<remote-alias>a</remote-alias>
<ordinal>1</ordinal>
<local-type>string</local-type>
<aggregation>Count</aggregation>
<width>255</width>
<contains-null>true</contains-null>
<attributes>
<attribute datatype='string' name='DebugRemoteType'>&quot;SQL_WVARCHAR&quot;</attribute>
<attribute datatype='string' name='DebugWireType'>&quot;SQL_C_WCHAR&quot;</attribute>
<attribute datatype='string' name='TypeIsVarchar'>&quot;true&quot;</attribute>
</attributes>
</metadata-record>
<metadata-record class='column'>
<remote-name>x</remote-name>
<remote-type>3</remote-type>
<local-name>[x]</local-name>
<parent-name>[xy]</parent-name>
<remote-alias>x</remote-alias>
<ordinal>2</ordinal>
<local-type>integer</local-type>
<aggregation>Sum</aggregation>
<precision>10</precision>
<contains-null>true</contains-null>
<attributes>
<attribute datatype='string' name='DebugRemoteType'>&quot;SQL_INTEGER&quot;</attribute>
<attribute datatype='string' name='DebugWireType'>&quot;SQL_C_SLONG&quot;</attribute>
</attributes>
</metadata-record>
<metadata-record class='column'>
<remote-name>y</remote-name>
<remote-type>3</remote-type>
<local-name>[y]</local-name>
<parent-name>[xy]</parent-name>
<remote-alias>y</remote-alias>
<ordinal>3</ordinal>
<local-type>integer</local-type>
<aggregation>Sum</aggregation>
<precision>10</precision>
<contains-null>true</contains-null>
<attributes>
<attribute datatype='string' name='DebugRemoteType'>&quot;SQL_INTEGER&quot;</attribute>
<attribute datatype='string' name='DebugWireType'>&quot;SQL_C_SLONG&quot;</attribute>
</attributes>
</metadata-record>
</metadata-records>
</connection>
<aliases enabled='yes' />
<column datatype='integer' name='[Number of Records]' role='measure' type='quantitative' user:auto-column='numrec'>
<calculation class='tableau' formula='1' />
</column>
<column caption='A' datatype='string' name='[a]' role='dimension' type='nominal' />
<column caption='Today&apos;s Date' datatype='string' name='[Today&apos;s Date]' role='dimension' type='nominal' />
<column caption='X' datatype='integer' name='[x]' role='measure' type='quantitative' />
<column caption='Y' datatype='integer' name='[y]' role='measure' type='quantitative' />
<layout dim-ordering='alphabetic' dim-percentage='0.5' measure-ordering='alphabetic' measure-percentage='0.5' show-structure='true' />
<semantic-values>
<semantic-value key='[Country].[Name]' value='&quot;United States&quot;' />
</semantic-values>
</datasource>
29 changes: 29 additions & 0 deletions test/test_datasource.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import unittest
import os.path
import functools

from tableaudocumentapi import Datasource

TEST_TDS_FILE = os.path.join(
os.path.dirname(__file__),
'assets',
'datasource_test.tds'
)


class DataSourceFields(unittest.TestCase):
def setUp(self):
self.ds = Datasource.from_file(TEST_TDS_FILE)

def test_datasource_returns_correct_fields(self):
self.assertIsNotNone(self.ds.fields)
self.assertIsNotNone(self.ds.fields.get('[Number of Records]', None))

def test_datasource_returns_calculation_from_fields(self):
self.assertEqual('1', self.ds.fields['[Number of Records]'].calculation)

def test_datasource_uses_metadata_record(self):
self.assertEqual('Sum', self.ds.fields['[x]'].aggregation)

def test_datasource_column_name_contains_apostrophy(self):
self.assertIsNotNone(self.ds.fields.get("[Today's Date]", None))
47 changes: 47 additions & 0 deletions test/test_multidict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import unittest
import os.path
import functools

from tableaudocumentapi.multilookup_dict import MultiLookupDict


class MLDTests(unittest.TestCase):
def setUp(self):
self.mld = MultiLookupDict({
'[foo]': {
'alias': 'bar',
'caption': 'baz',
'value': 1
},
'[bar]': {
'caption': 'foo',
'value': 2
},
'[baz]': {
'value': 3
}
})

def test_multilookupdict_name_only(self):
actual = self.mld['[baz]']
self.assertEqual(3, actual['value'])

def test_multilookupdict_alias_overrides_everything(self):
actual = self.mld['bar']
self.assertEqual(1, actual['value'])

def test_mutlilookupdict_caption_overrides_id(self):
actual = self.mld['foo']
self.assertEqual(2, actual['value'])

def test_mutlilookupdict_can_still_find_id_even_with_alias(self):
actual = self.mld['[foo]']
self.assertEqual(1, actual['value'])

def test_mutlilookupdict_can_still_find_caption_even_with_alias(self):
actual = self.mld['baz']
self.assertEqual(1, actual['value'])

def test_mutlilookupdict_can_still_find_id_even_with_caption(self):
actual = self.mld['[bar]']
self.assertEqual(2, actual['value'])

0 comments on commit 481f38c

Please sign in to comment.