-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.aggregate.xml
125 lines (115 loc) · 4.07 KB
/
test.aggregate.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<query xmlns="http://uzh.ch/ddis/katts/query">
<!-- Send a heart beat every 100 milliseconds. -->
<heartBeat interval="100" />
<!-- Before the query is deployed the data files are copied to all nodes with rsync. -->
<fileSource id="tickerSource">
<files>
<file csvFieldDelimiter="," mimeType="text/comma-separated-values" path="data/test.aggregate.triples" />
</files>
</fileSource>
<!-- Pre-Filters for the ticker data -->
<tripleFilter applyOnSource="tickerSource" id="TripleFilter_TickerPRC">
<conditions>
<condition restriction="PRC" item="predicate" /> <!-- ticker price -->
</conditions>
<produces>
<stream id="tickerPrice">
<variable name="ticker_price" referencesTo="object" />
<variable name="ticker_id" referencesTo="subject" />
</stream>
</produces>
</tripleFilter>
<tripleFilter applyOnSource="tickerSource" id="TripleFilter_TickerCOMNAM">
<conditions>
<condition restriction="COMNAM" item="predicate" /> <!-- company name -->
</conditions>
<produces>
<stream id="tickerCompanyName">
<variable name="company_name" referencesTo="object" />
<variable name="ticker_id" referencesTo="subject" />
</stream>
</produces>
</tripleFilter>
<tripleFilter applyOnSource="tickerSource" id="TripleFilter_TICKER">
<conditions>
<condition restriction="TICKER" item="predicate" /> <!-- ticker symbol -->
</conditions>
<produces>
<stream id="tickerSymbol">
<variable name="ticker_symbol" referencesTo="object" />
<variable name="ticker_id" referencesTo="subject" />
</stream>
</produces>
</tripleFilter>
<tripleFilter applyOnSource="tickerSource" id="TripleFilter_DEPARTMENT">
<conditions>
<condition restriction="DEPARTMENT" item="predicate" /> <!-- ticker symbol -->
</conditions>
<produces>
<stream id="tickerDepartment">
<variable name="ticker_department" referencesTo="object" />
<variable name="ticker_id" referencesTo="subject" />
</stream>
</produces>
</tripleFilter>
<oneFieldJoin maxBufferSize="20" joinPrecision="20000" joinOn="ticker_id" id="Ticker_Join">
<consumes>
<stream maxBufferSize="5" streamId="tickerSymbol">
<variableGrouping>
<groupOn variableName="ticker_id" />
</variableGrouping>
</stream>
<stream maxBufferSize="5" streamId="tickerPrice">
<variableGrouping>
<groupOn variableName="ticker_id" />
</variableGrouping>
</stream>
<stream maxBufferSize="5" streamId="tickerCompanyName">
<variableGrouping>
<groupOn variableName="ticker_id" />
</variableGrouping>
</stream>
<stream maxBufferSize="5" streamId="tickerDepartment">
<variableGrouping>
<groupOn variableName="ticker_id" />
</variableGrouping>
</stream>
</consumes>
<produces>
<stream id="tickerStream">
<variable name="ticker_symbol" referencesTo="ticker_symbol" />
<variable name="ticker_price" referencesTo="ticker_price" />
<variable name="company_name" referencesTo="company_name" />
<variable name="ticker_department" referencesTo="ticker_department" />
</stream>
</produces>
</oneFieldJoin>
<aggregate groupBy="ticker_symbol,ticker_department" windowSize="P3D" every="P3D" onlyIfChanged="true" parallelism="1">
<consumes>
<stream maxBufferSize="5" streamId="tickerStream">
<variableGrouping>
<groupOn variableName="ticker_symbol" />
</variableGrouping>
</stream>
</consumes>
<aggregators>
<sum of="ticker_price" as="teh_sum" />
</aggregators>
<produces>
<stream id="summedTickerStream">
<variable name="ticker_symbol" referencesTo="ticker_symbol" />
<variable name="ticker_department" referencesTo="ticker_department" />
<variable name="ticker_sum" referencesTo="teh_sum" />
</stream>
</produces>
</aggregate>
<fileOutput filePath="data/output.csv" id="fileOutput">
<consumes>
<stream maxBufferSize="5" streamId="summedTickerStream">
<shuffleGrouping />
</stream>
</consumes>
</fileOutput>
<termination />
</query>