forked from ArneBinder/itsrdf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
its-rdf.rdf
694 lines (640 loc) · 57.4 KB
/
its-rdf.rdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
<?xml version="1.0"?>
<rdf:RDF xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:nif="http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#"
xmlns:itsrdf="http://www.w3.org/2005/11/its/rdf#"
xmlns:owl ="http://www.w3.org/2002/07/owl#"
xmlns:dcterms="http://purl.org/dc/terms/"
xmlns:dc="http://purl.org/dc/elements/1.1/">
<owl:Ontology rdf:about="http://www.w3.org/2005/11/its/rdf#">
<dc:creator>Dave Lewis</dc:creator><dc:creator>Felix Sasaki</dc:creator><dc:creator>Sebastian Hellmann</dc:creator>
<rdfs:comment rdf:datatype="http://www.w3.org/2001/XMLSchema#string" xml:lang="en"> This is the ITS 2.0 / RDF
Ontology version 8 October 2015.
The ontology is used to express ITS 2.0 so-called "data categories" as linked data. For more information about ITS 2.0, see http://www.w3.org/TR/its20/
ITS 2.0 has been produced by the W3C MultilingualWeb-LT working group, see http://www.w3.org/International/multilingualweb/lt/
This ontology is being maintained by the W3C ITS Interest Group, see http://www.w3.org/International/its/ig/
Please note that this file may change without notice. For updated information, please refer to the latest version of Internationalization Tag Set (ITS) Version 2.0 specification at http://www.w3.org/TR/its20/ and the ITS IG page at http://www.w3.org/International/its/ig/
The ITS 2.0 / RDF Ontology is an an RDF/OWL-based format that represents information to foster automated creation and processing of multilingual Web content. The ontology provides definitions of RDF properties that exactly align with the so-called "data categories" provided in the ITS 2.0 specification. The difference is the usage scenario. The ITS 2.0 specification defines usages of ITS in markup, the ontology defines usage in linked data.
Like with ITS 2.0 itself, the main purpose of this ontology is to express information in source content in one or several languages that then can be taken up in multilingual workflows. An example of such information are the "Translate" data category and the "translate" RDF property. They trigger in translation workflows whether a human translator or a machine translation system should change a particular piece of content.
Because of role of ITS, its actually usage often involves both source content and workflow formats. An XML format for multilingual workflows is XLIFF (XML Localization Interchange File Format). As of writing, native ITS support is prepared for the upcoming XLIFF version 2.1. A prominent usage scenario of the ITS ontology in the realm of linked data workflows involving natural language is NIF (Natural Language Processing Interchange format). Another potential usage scenario is the W3C Annotation model, see http://www.w3.org/TR/annotation-model/ , or in localization workflows as discussed in the BPMLOD community group http://www.w3.org/community/bpmlod/wiki/Use_cases_definition#Localization_Workflow
Linked data examples that show the usage of this ontology in NIF can be found the via FREME API http://api.freme-project.eu/doc/current/ . A prototypical example file in XML and its counterpart involving NIF can be found here:
https://github.com/w3c/itsrdf/blob/master/its-in-markup.xml
https://github.com/w3c/itsrdf/blob/master/its-in-linked-data.json
The ontology contains only properties that express the same information as the related ITS 2.0 data categories. Each property definition has a link to the underlying data category section in the ITS 2.0 specification.</rdfs:comment>
<dc:publisher>World Wide Web Consortium (W3C)</dc:publisher>
<dc:rights xml:lang="en">This ontology is licensed under the W3C software license (http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231)</dc:rights>
<dcterms:license rdf:resource="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231"/>
<dcterms:title xml:lang="en">ITS 2.0 / RDF Ontology</dcterms:title>
<ns2:preferredNamespacePrefix xmlns:ns2="http://purl.org/vocab/vann/">itsrdf</ns2:preferredNamespacePrefix>
<ns3:preferredNamespaceUri xmlns:ns3="http://purl.org/vocab/vann/">http://www.w3.org/2005/11/its/rdf#</ns3:preferredNamespaceUri>
<rdfs:isDefinedBy rdf:resource="https://github.com/w3c/itsrdf/blob/master/its-rdf.rdf"/>
<rdfs:label xml:lang="en">ITS 2.0 / RDF Ontology</rdfs:label>
<owl:versionInfo>Versioning done on resource level. See https://github.com/w3c/itsrdf/blob/master/its-rdf.rdf</owl:versionInfo>
<owl:versionInfo>0.1.1</owl:versionInfo>
<rdfs:comment xml:lang="en">
Changelog:
* 0.1.1 update of abstract and editing of properties "translate" and "locNoteType"
* 0.1.2 further editing of ontology description by Arne Binder
</rdfs:comment>
</owl:Ontology>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#translate">
<rdfs:comment xml:lang="en">The translate property is based on the ITS "Translate" data category. The property expresses information about whether the text associated with a resource should be translated or not. Possible values are "yes" (translatable) or "no" (not translatable). Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#translatability-definition</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locNoteType">
<rdfs:comment xml:lang="en">The locNoteType property is based on the ITS "Localization Note" data category. The property expresses the type of a localization note.
Possible values are "description" or "alert":
• An alert contains information that the translator has to read before translating a piece of text. Example: an instruction to the translator to leave parts of the text in the source language.
• A description provides useful background information that the translator will refer to only if they wish. Example: a clarification of ambiguity in the source text.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#locNote-datacat</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locNote">
<rdfs:comment xml:lang="en">The locNote property is based on the ITS "Localization Note" data category. The property is used to communicate notes to localizers about a particular item of content.
This property can be used for several purposes, including, but not limited to:
• Tell the translator how to translate parts of the content
• Expand on the meaning or contextual usage of a specific resource, such as what a variable refers to or how a string will be used in the user interface
• Clarify ambiguity and show relationships between resources sufficiently to allow correct translation (e.g., in many languages it is impossible to translate the word"enabled" in isolation without knowing the gender, number, and case of the thing it refers to.)
• Indicate why a piece of text is emphasized (important, sarcastic, etc.)
Editing tools may offer an easy way to create this type of information. Translation tools can be made to recognize the difference between these two types of localization notes, and present the information to translators in different ways.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#locNote-datacat</rdfs:comment>
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locNoteRef">
<rdfs:comment xml:lang="en">
The locNoteRef property is based on the ITS "Localization Note" data category. This property refers to a resource holding the localization note, see DatatypeProperty locNote for further information.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#locNote-datacat
</rdfs:comment>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#term">
<rdfs:comment xml:lang="en">
The term property is based on the ITS "Terminology" data category. This property is used to mark terms and optionally associate them with information, such as definitions. This helps to increase consistency across different parts of the documentation. It is also helpful for translation. Possible values are "yes" or "no".
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#terminology
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#termInfoRef">
<rdfs:comment xml:lang="en">
The termInfoRef property is based on the ITS "Terminology" data category. This property refers to the resource providing information about the term.
It is used to associate terms with information, such as definitions. This helps to increase consistency across different parts of the documentation. It is also helpful for translation.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#terminology
</rdfs:comment>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#termConfidence">
<rdfs:comment xml:lang="en">
The termConfidence property is based on the ITS "Terminology" data category. The value of this property is a rational number in the interval 0 to 1 (inclusive). termConfidence represents the confidence of the agents producing the annotation that the annotated unit is a term or not. 1 represents the highest level of confidence. termConfidence does not provide confidence information related to termInfoRef.
If this property is set, the termAnnotatorsRef has to be set also. See http://www.w3.org/TR/its20/#its-tool-annotation for further information.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#terminology
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#decimal" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#termAnnotatorsRef">
<rdfs:comment xml:lang="en">
The termAnnotatorsRef property is based on the ITS "Terminology" data category.
It has to be set, if the termConfidence property is used, see http://www.w3.org/TR/its20/#its-tool-annotation for further information.
This property indicates information about the processor used to generate the terminology annotation.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#terminology
</rdfs:comment>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#dir">
<rdfs:comment xml:lang="en">
The dir property is based on the ITS "Directionality" data category.
This property allows the user to specify the base writing direction of blocks, embeddings, and overrides for the Unicode bidirectional algorithm. It has four possible values: "ltr", "rtl", "lro" and "rlo".
Note:
ITS defines only the values of the Directionality data category and their inheritance. The behavior of text labeled in this way may vary, according to the implementation. Implementers are encouraged, however, to model the behavior on that described in the CSS 2.1 specification or its successor. In such a case, the effect of the data category's values would correspond to the following CSS rules:
• Data category value: "ltr" (left-to-right text)
• CSS rule: *[dir="ltr"] { unicode-bidi: embed; direction: ltr}
• Data category value: "rtl" (right-to-left text)
• CSS rule: *[dir="rtl"] { unicode-bidi: embed; direction: rtl}
• Data category value: "lro" (left-to-right override)
• CSS rule: *[dir="lro"] { unicode-bidi: bidi-override; direction: ltr}
• Data category value: "rlo" (right-to-left override)
• CSS rule: *[dir="rlo"] { unicode-bidi: bidi-override; direction: rtl}
Note:
At the time of writing, enhancements are being discussed in the context of HTML5 that are expected to change the approach to marking up Directionality, in particular to support content where directionality needs to be isolated from that of surrounding content. However, these enhancements are not finalized yet. This section therefore reflects directionality markup in [HTML 4.01]; enhancements in HTML5 will be reflected in a future revision.
More information about how to use this data category is provided by [Bidi Article](http://www.w3.org/TR/its20/#bidiarticle).
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#directionality
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#withinText">
<rdfs:comment xml:lang="en">
The withinText property is based on the ITS "Elements Within Text" data category.
This property reveals if and how an resource affects the way text content behaves from a linguistic viewpoint. This information is for example relevant to provide basic text segmentation hints for tools such as translation memory systems. The values associated with this property are:
• "yes": The resource is part of the flow of its parent resource. For example like the element strong in [XHTML 1.0]:
<strong>Appaloosa horses</strong> have spotted coats.
• "nested": The resource is part of the flow of its parent resource, its content is an independent flow. For example like the element fn in [DITA 1.0]:
Palouse horses<fn>A Palouse horse is the same as an Appaloosa.</fn> have spotted coats.
• "no": The resource splits the text flow of its parent resource and its content is an independent text flow. For example like the element p when inside the element li in DITA or XHTML:
<li>Palouse horses: <p>They have spotted coats.</p> <p>They have been bred by the Nez Perce.</p> </li>
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#elements-within-text
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#domains">
<rdfs:comment xml:lang="en">
The domains property is based on the ITS "Domain" data category.
This property is used to identify the topic or subject of the resource. Such information allows for more relevant linguistic choices during various processes.
Examples of usage include:
• Allowing machine translation systems to select the most appropriate engine and rules to translate the content.
• Providing a general indication of what terminology collection is most suitable for use by translators.
This data category addresses various challenges:
• Often domain-related information already exists in the document (e.g., keywords in the HTML meta element). The Domain data category provides a mechanism to point to this information.
• There are many flat or structured lists of domain related values, keywords, key phrases, classification codes, ontologies, etc. The Domain data category does not propose its own given list. Instead it provides a mapping mechanism to associate the values in the document with the values used by the consumer tool.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#domain
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#taConfidence">
<rdfs:comment xml:lang="en">
The taConfidence property is based on the ITS "Text Analysis" data category.
This optional property implements the text analysis confidence, i.e. the confidence of the agent (that produced the annotation)in its own computation, see http://www.w3.org/TR/its20/#textAnalysis-info-pieces for further information.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#textanalysis
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#decimal" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#taAnnotatorsRef">
<rdfs:comment xml:lang="en">
The taAnnotatorsRef property is based on the ITS "Text Analysis" data category.
It has to be set, if the taConfidence property is used, see http://www.w3.org/TR/its20/#its-tool-annotation.
This property indicates information about the processor used to generate the text analysis annotation.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#textanalysis
</rdfs:comment>
</owl:ObjectProperty>
<owl:AnnotationProperty rdf:about="http://www.w3.org/2005/11/its/rdf#taPropRef">
<rdfs:comment>TODO: documentation needed</rdfs:comment>
</owl:AnnotationProperty>
<owl:AnnotationProperty rdf:about="http://www.w3.org/2005/11/its/rdf#taClassRef">
<rdfs:comment xml:lang="en">
The taClassRef property is based on the ITS "Text Analysis" data category.
The value of this property holds the Entity type / concept class information, i.e. the type of entity, or concept class of the text analysis target.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#textanalysis
</rdfs:comment>
</owl:AnnotationProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#taIdentRef">
<rdfs:comment xml:lang="en">
The taIdentRef property is based on the ITS "Text Analysis" data category.
This property refers to the text analysis target, see http://www.w3.org/TR/its20/#textAnalysis-info-pieces for further information.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#textanalysis
</rdfs:comment>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#taIdent">
<rdfs:comment xml:lang="en">
The taIdent property is based on the ITS "Text Analysis" data category.
This property holds the identifier of the concept in the collection and is used in combination with taSource property, see http://www.w3.org/TR/its20/#textAnalysis-info-pieces for further information.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#textanalysis
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#taSource">
<rdfs:comment xml:lang="en">
The taIdent property is based on the ITS "Text Analysis" data category.
This property holds the identifier of the collection source and is used in combination with the taIdent property, see http://www.w3.org/TR/its20/#textAnalysis-info-pieces for further information.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#textanalysis
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#localeFilterList">
<rdfs:comment xml:lang="en">
The localeFilterList property is based on the ITS "Local Filter" data category.
It specifies that a resource is only applicable to certain locales and interacts with the localeFilterType property.
This property can be used for several purposes, including, but not limited to:
• Including a legal notice only in locales for certain regions.
• Dropping editorial notes from all localized output.
The list is comma-separated and can include the wildcard extended language range "*". The list can also be empty. Whitespace surrounding language ranges is ignored.
This property interacts with the localeFilterType property in the following way:
• A single wildcard "*" with a localeFilterType "include" indicates that the resource applies to all locales.
• A single wildcard "*" with a localeFilterType "exclude" indicates that the resource applies to no locale.
• An empty string with a localeFilterType "include" indicates that the resource applies to no locale.
• An empty string with a localeFilterType "exclude" indicates that the resource applies to all locales.
• Otherwise, with a localeFilterType "include", the resource applies to the locales for which the language tag has a match in the list when using the Extended Filtering algorithm defined in [BCP47](http://www.w3.org/TR/its20/#bcp47).
• If, instead, the localeFilterType is "exclude", the resource applies to the locales for which the language tag does not have a match in the list when using the Extended Filtering algorithm defined in [BCP47](http://www.w3.org/TR/its20/#bcp47).
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#LocaleFilter
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#localeFilterType">
<rdfs:comment xml:lang="en">
The optional localeFilterType property is based on the ITS "Local Filter" data category. Possible values are "include" or "exclude". It interacts with the localeFilterList property.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#LocaleFilter
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#hasProvRecord">
<rdfs:comment xml:lang="en">
The hasProvRecord property is based on the ITS "Provenance" data category.
The property is used to communicate the identity of agents that have been involved in the translation of the content or the revision of the translated resource. This allows translation and translation revision consumers, such as post-editors, translation quality reviewers, or localization workflow managers, to assess how the performance of these agents may impact the quality of the translation. Translation and translation revision agents can be identified as a person, a piece of software or an organization that has been involved in providing a translation that resulted in the resource.
The provRecord offers three types of information. First, it allows identification of translation agents. Second, it allows identification of revision agents. Third, if provenance information is needed that includes temporal or sequence information about translation processes (e.g. multiple revision cycles) or requires agents that support a wider range of activities, this property offers a mechanism to refer to external provenance information.
Note:
The specification does not define the format of external provenance information, but it is recommended that an open provenance or change-logging format be used, e.g. the W3C provenance data model [PROV-DM].
Translation or translation revision tools, such as machine translation engines or computer assisted translation tools, may offer an easy way to create this information. Translation tools can then present this information to post-editors or translation workflow managers. Web applications may to present such information to consumers of translated documents.
Note:
The tool related provenance and tool related revision provenance pieces of information are not meant to express information about tools used for creating ITS annotations themselves. For this purpose, ITS 2.0 provides a separate mechanism. See http://www.w3.org/TR/its20/#its-tool-annotation for details, especially http://www.w3.org/TR/its20/#annotators-ref-usage-scenarios.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:range>
<owl:Class rdf:about="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</rdfs:range>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#person">
<rdfs:comment xml:lang="en">
The person property is based on the ITS "Provenance" data category.
This property holds the human provenance information, the identification of a human translation agent.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#personRef">
<rdfs:comment xml:lang="en">
The personRef property is based on the ITS "Provenance" data category.
This property holds the human provenance information, the identification of a human translation agent.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#org">
<rdfs:comment xml:lang="en">
The org property is based on the ITS "Provenance" data category.
This property holds the organizational provenance information, the identification of an organization acting as a translation agent.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#orgRef">
<rdfs:comment xml:lang="en">
The orgRef property is based on the ITS "Provenance" data category.
This property holds the organizational provenance information, the identification of an organization acting as a translation agent.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#tool">
<rdfs:comment xml:lang="en">
The tool property is based on the ITS "Provenance" data category.
This property holds the tool-related provenance information, the identification of a software tool that was used in translating the resource.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#toolRef">
<rdfs:comment xml:lang="en">
The toolRef property is based on the ITS "Provenance" data category.
This property holds the tool-related provenance information, the identification of a software tool that was used in translating the resource.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#revPerson">
<rdfs:comment xml:lang="en">
The revPerson property is based on the ITS "Provenance" data category.
This property holds the human revision provenance information, the identification of a human translation revision agent.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#revPersonRef">
<rdfs:comment xml:lang="en">
The revPersonRef property is based on the ITS "Provenance" data category.
This property holds the human revision provenance information, the identification of a human translation revision agent.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#revOrg">
<rdfs:comment xml:lang="en">
The revOrg property is based on the ITS "Provenance" data category.
This property holds the organizational revision provenance information, the identification of an organization acting as a translation revision agent.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#revOrgRef">
<rdfs:comment xml:lang="en">
The revOrgRef property is based on the ITS "Provenance" data category.
This property holds the organizational revision provenance information, the identification of an organization acting as a translation revision agent.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#revTool">
<rdfs:comment xml:lang="en">
The revTool property is based on the ITS "Provenance" data category.
This property holds the tool-related revision provenance information, the identification of a software tool that was used in revising the translation of the resource.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#revToolRef">
<rdfs:comment xml:lang="en">
The revToolRef property is based on the ITS "Provenance" data category.
This property holds the tool-related revision provenance information, the identification of a software tool that was used in revising the translation of the resource.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:ObjectProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#provRef">
<rdfs:comment xml:lang="en">
The provRef property is based on the ITS "Provenance" data category.
This property holds the reference to external provenance descriptions, a reference to external provenance information.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#provenance
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#ProvRecord" />
</owl:ObjectProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#externalResourceRef">
<rdfs:comment xml:lang="en">
The externalResourceRef property is based on the ITS "External Resource" data category.
This property indicates that a resource represents or references potentially translatable data in a resource outside the resource. Examples of such resources are external images and audio or video files.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#externalresource
</rdfs:comment>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#target" >
<rdfs:comment xml:lang="en">
The target property is based on the ITS "Target Pointer" data category.
Some formats, such as those designed for localization or for multilingual resources, hold the same content in different languages inside a single resource. This property is used to associate the resource of a given source content (i.e., the content to be translated) and the resource of its corresponding target content (i.e., the source content translated into a given target language).
This specification makes no provision regarding the presence of the target resources or their content: A target resource may or may not exist and it may or may not have content.
This property can be used for several purposes, including but not limited to:
• Extract the source resource to translate and put back the translation at its proper location.
Compare source and target resource for quality verification.
• Reuse existing translations when localizing the new version of an existing resource.
• Access aligned bi-lingual content to build memories, or to train machine translation engines.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#target-pointer
</rdfs:comment>
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#id">
<rdfs:comment xml:lang="en">
The id property is based on the ITS "ID Value" data category.
This property indicates a value that can be used as unique identifier for a given part of the content.
Providing a unique identifier that is maintained in the original document can be useful for several purposes, for example:
• Allow automated alignment between different versions of the source resource, or between source and translated resources.
• Improve the confidence in leveraged translation for exact matches.
• Provide backtracking information between displayed text and source material when testing or debugging.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#idvalue
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#space">
<rdfs:comment xml:lang="en">
The space property is based on the ITS "Preserve Space" data category.
The value of this property indicates how whitespace is to be handled in content. The possible values for this property are "default" and "preserve" and carry the same meaning as the corresponding values of the xml:space attribute. The default value is "default". The Preserve Space data category does not apply to HTML documents in HTML syntax.
The xml:space attribute, as defined in section 2.10 of [XML 1.0], maps exactly to the Preserve Space data category.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#preservespace
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#hasLocQualityIssue">
<rdfs:comment xml:lang="en">
The hasLocQualityIssue property is based on the ITS "Localization Quality Issue" data category.
This property is used to express information related to localization quality assessment tasks. Such tasks can be conducted on the translation of some source content (such as a text or an image) into a target language or on the source content itself where its quality may impact on the localization process.
Note:
Automated or manual quality assessment is one area of quality management for translation and localization. An example of existing quality assessment is in-country review (e.g., as part of a language acceptance test for software). An important part of quality assessment is the list of issue types that are being used. Very often, simple issue categories like "correct/incorrect" or "like/dislike" are inadequate; instead, more specific ones such as "terminology" or "grammar" are more helpful in identifying concrete reasons for quality problems and for obtaining a more objective picture of quality levels.
Non-normative terminology related to localization quality as used in this section is provided in Appendix H: Localization Quality Guidance, see http://www.w3.org/TR/its20/#localization-quality-guidance.
This property can be used in a number of ways, including the following example scenarios:
• A human reviewer working with a web-based tool adds quality markup manually in a text editor, including comments and suggestions, to localized content as part of the review process. A subsequent process examines this markup to ensure that changes were made.
• A fully automatic quality checking tool flags a number of potential quality issues in an XML or HTML file and marks them up using ITS 2.0 markup. A human reviewer then uses another tool to examine this markup and decide whether the file needs to receive more extensive review or be passed on for further processing without a further manual review stage.
• A quality assessment process identifies a number of issues and adds the ITS markup to a rendered HTML preview of an XML file along with CSS styling that highlights these issues. The resulting HTML file is then sent back to the translator to assist his or her revision efforts.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqissue
</rdfs:comment>
<rdfs:range>
<owl:Class rdf:about="http://www.w3.org/2005/11/its/rdf#LocQualityIssue" />
</rdfs:range>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityIssueType">
<rdfs:comment xml:lang="en">
The locQualityIssueType property is based on the ITS "Localization Quality Issue" data category.
The value of this property holds the type information, a classifier that groups similar issues into categories (for example to differentiate spelling errors from grammar errors). See http://www.w3.org/TR/its20/#lqissue-typevalues for a list of allowed values.
ITS 2.0-compliant tools that use these types MUST map their internal values to these types. If the type of the issue is set to uncategorized, a comment MUST be specified as well.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqissue
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#LocQualityIssue" />
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityIssueComment">
<rdfs:comment xml:lang="en">
The locQualityIssueComment property is based on the ITS "Localization Quality Issue" data category.
The value of this property holds the comment information, a human-readable description of a specific instance of a quality issue.
Comments can be used to explain an issue or provide guidance in addressing an issue. For example, a note about a Terminology issue might specify what term should be used.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqissue
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#LocQualityIssue" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityIssueSeverity">
<rdfs:comment xml:lang="en">
The locQualityIssueSeverity property is based on the ITS "Localization Quality Issue" data category.
The value of this property holds the severity information, a classifier for the seriousness of an issue. The seriousness depends on the Quality Model that is being applied. The Quality Model should be made explicit via the Profile Reference.
The value has to be a rational number in the interval 0 to 100 (inclusive). The higher values represent greater severity.
It is up to tools to map the values allowed by ITS 2.0 to their own system’s scale.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqissue
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#LocQualityIssue" />
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#decimal" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityIssueProfileRef">
<rdfs:comment xml:lang="en">
The locQualityIssueProfileRef property is based on the ITS "Localization Quality Issue" data category.
The value of this property holds the profile reference information, a description of the quality assessment model (or a specific profile (customization/instantiation) of a model, where relevant) used for the issue.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqissue
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#LocQualityIssue" />
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityIssueEnabled">
<rdfs:comment xml:lang="en">
The locQualityIssueEnabled property is based on the ITS "Localization Quality Issue" data category.
The value of this property holds the enabled information, a flag indicating whether the issue is enabled or not.
Possible values are "yes" or "no", with the default value being "yes".
This flag is used to activate or deactivate issues. There is no prescribed behavior associated with activated or deactivated issues. One example of usage is a tool that allows the user to deactivate false positives so they are not displayed again each time the document is re-checked.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqissue
</rdfs:comment>
<rdfs:domain rdf:resource="http://www.w3.org/2005/11/its/rdf#LocQualityIssue" />
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityRatingScore">
<rdfs:comment xml:lang="en">
The locQualityRatingScore property is based on the ITS "Localization Quality Rating" data category. It is used to express an overall measurement of the localization quality of a resource.
This property allows to specify a quality score for a given item or document.
Its value is a rational number in the interval 0 to 100 (inclusive). The higher values represent better quality.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqrating
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#decimal" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityRatingScoreThreshold">
<rdfs:comment xml:lang="en">
The locQualityRatingScoreThreshold property is based on the ITS "Localization Quality Rating" data category. It interacts with the locQualityRatingScore property.
The value of this property indicates the lowest score that constitutes a passing score in the profile used. Its value is a rational number in the interval 0 to 100 (inclusive).
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqrating
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#decimal" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityRatingVote">
<rdfs:comment xml:lang="en">
The locQualityRatingVote property is based on the ITS "Localization Quality Rating" data category. It is used to express an overall measurement of the localization quality of a resource.
This property allows to specify a voting result for a given item or document.
Its value is a signed integer with higher values indicating a better vote.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqrating
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#integer" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityRatingVoteThreshold">
<rdfs:comment xml:lang="en">
The locQualityRatingVoteThreshold property is based on the ITS "Localization Quality Rating" data category. It interacts with the locQualityRatingVote property.
The value of this property indicates the lowest value that constitutes a passing vote in the profile used. Its value is a signed integer.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqrating
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#integer" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#locQualityRatingProfileRef">
<rdfs:comment xml:lang="en">
The locQualityRatingProfileRef property is based on the ITS "Localization Quality Rating" data category.
It points to the reference resource describing the quality assessment model used for the scoring.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#lqrating
</rdfs:comment>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#mtConfidence">
<rdfs:comment xml:lang="en">
The mtConfidence property is based on the ITS "MT Confidence" data category.
This property is used to communicate the confidence score from a machine translation engine for the accuracy of a translation it has provided. It is not intended to provide a score that is comparable between machine translation engines and platforms. This data category does NOT aim to establish any sort of correlation between the confidence score and either human evaluation of MT usefulness, or post-editing cognitive effort. For harmonization’s sake, MT Confidence is provided as a rational number in the interval 0 to 1 (inclusive).
Note:
Implementers are expected to interpret the floating-point number and present it to human and other consumers in a convenient form, such as percentage (0-100%) with up to 2 decimal digits, font or background color coding, etc.
Note:
The value provided by the MT Confidence data category can be 1) the quality score of the translation as produced by an MT engine, or 2) a quality estimation score that uses both MT-system-internal features and additional external features. For this reason it is important that MT Confidence provides additional information about the MT engine (via the annotatorsRef property). Otherwise the score on its own is hard to interpret and to reuse. In the case of 2), MT Confidence potentially conveys information about any additional tools that were used in deriving the score.
This property can be used for several purposes, including, but not limited to:
• Automated prioritising of raw machine translated text for further processing based on empirically set thresholds.
• Providing readers, translators, post-editors, reviewers, and proof-readers of machine translated text with self-reported relative accuracy prediction.
MT confidence scores can be displayed e.g., on websites machine translated on the fly, by simple web-based translation editors or by Computer Aided Translation (CAT) tools.
The value of this property represents the translation confidence score as a rational number in the interval 0 to 1 (inclusive).
If this property is set, the mtConfidenceAnnotatorsRef has to be set also. See http://www.w3.org/TR/its20/#its-tool-annotation for further information.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#mtconfidence
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#decimal" />
</owl:DatatypeProperty>
<owl:ObjectProperty rdf:about="http://www.w3.org/2005/11/its/rdf#mtConfidenceAnnotatorsRef">
<rdfs:comment xml:lang="en">
The mtConfidenceAnnotatorsRef property is based on the ITS "MT Confidence" data category.
This property has to be set, if the mtConfidence property is used, see http://www.w3.org/TR/its20/#its-tool-annotation for further information.
This property refers to the processor used to generate the mt annotation.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#mtconfidence
</rdfs:comment>
</owl:ObjectProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#allowedCharacters">
<rdfs:comment xml:lang="en">
The allowedCharacters property is based on the ITS "Allowed Characters" data category.
This property is used to specify the characters that are permitted in a given piece of content.
It can be used for various purposes, including the following examples:
• Limiting the characters that may be used in the UI of a game due to font restrictions.
• Preventing illegal characters from being entered as text content that represents file or directory names.
• Controlling what characters can be used when translating examples of a login name in content.
Note:
The Allowed Characters data category is not intended to disallow HTML markup. The purpose is to restrict the content to various characters only, e.g., when the content is to be used for URL or filename generation. In most Content Management Systems, content is divided into several fields, some of which may be restricted to plain text, while in other fields HTML fragments may be allowed. Enforcing such restrictions is outside the scope of this property.
The set of characters that are allowed is specified using a regular expression. That is, each character in the selected content MUST be included in the set specified by the regular expression.
The regular expression is the character class construct charClass defined as follows:
[1] charClass ::= singleCharEsc | charClassExpr | wildcardEsc
[2] singleCharEsc ::= '\' [nrt\|.?*+(){}#x2D#x5B#x5D#x5E]
[3] charClassExpr ::= '[' charGroup ']'
[4] charGroup ::= posCharGroup | negCharGroup
[5] posCharGroup ::= ( charRange | singleCharEsc )+
[6] charRange ::= seRange | xmlCharIncDash
[7] seRange ::= charOrEsc '-' charOrEsc
[8] charOrEsc ::= xmlChar | singleCharEsc
[9] xmlChar ::= [^\#x2D#x5B#x5D]
[10] xmlCharIncDash ::= [^\#x5B#x5D]
[11] negCharGroup ::= '^' posCharGroup
[12] wildcardEsc ::= '.'
The . metacharacter also matches CARRIAGE RETURN (U+000D) and LINE FEED (U+000F). That is the dot-all option is set.
This construct is a sub-set of the Character Classes construct of XML Schema [XML Schema Part 2] and is compatible with most other regular expression engines.
Note:
Users may want to use a regular expression to make sure that they follow the definition given above. Sample regular expressions to verify the regular expression in allowed characters are provided: for XML (see http://www.w3.org/TR/its20/examples/allowed-characters-verify-xml-regex.txt) and for Java (see http://www.w3.org/TR/its20/examples/allowed-characters-verify-java-regex.txt).
Example of expressions (shown as XML source):
• "[abc]": allows the characters 'a', 'b' and 'c'.
• "[a-c]": allows the characters 'a', 'b' and 'c'.
• "[a-zA-Z]": allows the characters from 'a' to 'z' and from 'A' to 'Z'.
• "[^abc]": allows any characters except 'a', 'b', and 'c'.
• "[^&#x0061;-c]": allows any characters except 'a', 'b', and 'c'.
• "[^&lt;>:&quot;\\/|\?*]": allows only the characters valid for Windows file names.
• ".": allows any character.
• "": allows no character.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#allowedchars
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#storageSize">
<rdfs:comment xml:lang="en">
The storageSize property is based on the ITS "Storage Size" data category.
This property is used to specify the maximum storage size of a given content.
It defines the maximum number of bytes the text of the selected node is allowed in storage.
It can be used for various purposes, including the following examples:
• Verify during translation if a string fits into a fixed-size database field.
• Control the size of a string that is stored in a fixed-size memory buffer at run-time.
The storage size is always expressed in bytes and excludes any leading Byte-Order-Markers. It is provided along with the character encoding and the line break type that will be used when the content is stored. If the encoding form does not use the byte as its unit (e.g. UTF-16 uses 16-bit code units) the storage size MUST still be given in byte (e.g., for UTF-16: 2 bytes per 16-bit code unit).
An application verifying the storage size for a given content is expected to perform the following steps:
• All the LINE FEED (U+000A) characters of the content to verify are replaced by the character or characters specified by the line break type.
• The resulting string is converted to an array of bytes using a character encoder for the specified encoding. If a character cannot be represented with the specified encoding, an error is generated.
• If the leading bytes represent a Byte-Order-Mark, they are stripped from that array.
• The length of the resulting array is compared to the storage size provided. The content is too long if the length is greater than the storage size.
Note:
Storage size is not directly related to the display length of a text, and therefore is not intended as a display length constraint mechanism.
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#storagesize
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#nonNegativeInteger" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#storageEncoding">
<rdfs:comment xml:lang="en">
The storageEncoding property is based on the ITS "Storage Size" data category.
The value of this property holds the name of the character encoding used to calculate the number of bytes of the selected text. The name MUST be one of the names or aliases listed in the IANA Character Sets registry [IANA Character Sets]. The default value is "UTF-8".
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#storagesize
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<owl:DatatypeProperty rdf:about="http://www.w3.org/2005/11/its/rdf#lineBreakType">
<rdfs:comment xml:lang="en">
The lineBreakType property is based on the ITS "Storage Size" data category.
The value of this property indicates what type of line breaks the storage uses. The possible values are: "cr" for CARRIAGE RETURN (U+000D), "lf" for LINE FEED (U+000A), or "crlf" for CARRIAGE RETURN (U+000D) followed by LINE FEED (U+000A). The default value is "lf".
Definition in the ITS 2.0 specification: see http://www.w3.org/TR/its20/#storagesize
</rdfs:comment>
<rdfs:range rdf:resource="http://www.w3.org/2001/XMLSchema#string" />
</owl:DatatypeProperty>
<!-- Below are some example instances making use of the ontology.
See the HTML input document from which the instances are generated
at http://www.w3.org/TR/its20/#EX-HTML-whitespace-normalization -->
<rdf:Description rdf:about="http://example.com/exampledoc.html#char=0,29">
<nif:sourceUrl rdf:resource="http://example.com/exampledoc.html"/>
<nif:isString>Welcome to Dublin in Ireland!</nif:isString>
<rdf:type rdf:resource="http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#Context"/>
</rdf:Description>
<rdf:Description rdf:about="http://example.com/exampledoc.html#char=0,29">
<rdf:type rdf:resource="http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#RFC5147String"/>
<nif:anchorOf>Welcome to Dublin in Ireland!</nif:anchorOf>
<itsrdf:translate>yes</itsrdf:translate>
<itsrdf:withinText>no</itsrdf:withinText>
<nif:referenceContext rdf:resource="http://example.com/exampledoc.html#char=0,29"/>
</rdf:Description>
<rdf:Description rdf:about="http://example.com/exampledoc.html#xpath(/html/body%5B1%5D/h2%5B1%5D)">
<nif:convertedFrom rdf:resource="http://example.com/exampledoc.html#char=0,29"/>
</rdf:Description>
<rdf:Description rdf:about="http://example.com/exampledoc.html#char=11,17">
<rdf:type rdf:resource="http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#RFC5147String"/>
<nif:anchorOf>Dublin</nif:anchorOf>
<itsrdf:translate>no</itsrdf:translate>
<itsrdf:withinText>yes</itsrdf:withinText>
<itsrdf:taIdentRef rdf:resource="http://dbpedia.org/resource/Dublin"/>
<nif:referenceContext rdf:resource="http://example.com/exampledoc.html#char=0,29"/>
</rdf:Description>
<rdf:Description rdf:about="http://example.com/exampledoc.html#xpath(/html/body%5B1%5D/h2%5B1%5D/span%5B1%5D)">
<nif:convertedFrom rdf:resource="http://example.com/exampledoc.html#char=11,17"/>
</rdf:Description>
<rdf:Description rdf:about="http://example.com/exampledoc.html#char=21,28">
<rdf:type rdf:resource="http://persistence.uni-leipzig.org/nlp2rdf/ontologies/nif-core#RFC5147String"/>
<nif:anchorOf>Ireland</nif:anchorOf>
<itsrdf:translate>no</itsrdf:translate>
<itsrdf:withinText>yes</itsrdf:withinText>
<nif:referenceContext rdf:resource="http://example.com/exampledoc.html#char=0,29"/>
</rdf:Description>
<rdf:Description rdf:about="http://example.com/exampledoc.html#xpath(/html/body%5B1%5D/h2%5B1%5D/b%5B1%5D)">
<nif:convertedFrom rdf:resource="http://example.com/exampledoc.html#char=21,28"/>
</rdf:Description>
</rdf:RDF>