forked from knmnyn/hugo-blox
-
Notifications
You must be signed in to change notification settings - Fork 18
/
publications.bib
409 lines (386 loc) · 31.7 KB
/
publications.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
@inproceedings{10.1145/1378889.1378921,
author = {Zhao, Jin and Kan, Min-Yen and Theng, Yin Leng},
title = {Math information retrieval: user requirements and prototype implementation},
year = {2008},
isbn = {9781595939982},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1378889.1378921},
doi = {10.1145/1378889.1378921},
abstract = {We report on the user requirements study and preliminary implementation phases in creating a digital library that indexes and retrieves educational materials on math. We first review the current approaches and resources for math retrieval, then report on the interviews of a small group of potential users to properly ascertain their needs. While preliminary, the results suggest that meta-search and resource categorization are two basic requirements for a math search engine. In addition, we implement a prototype categorization system and show that the generic features work well in identifying the math contents from the webpage but perform less well at categorizing them. We discuss our long term goals, where we plan to investigate how math expressions and text search may be best integrated.},
booktitle = {Proceedings of the 8th ACM/IEEE-CS Joint Conference on Digital Libraries},
pages = {187–196},
numpages = {10},
keywords = {web classification, user requirement analysis, niche search engines, math information retrieval, interaction histories},
location = {Pittsburgh PA, PA, USA},
series = {JCDL '08}
}
@inproceedings{10.5555/1983222.1983288,
author = {Theng, Yin-Leng and Ei Tun, Ei and Zaw, Ma May Htoo and Cho, Seint Yee Yee and Miao, Chunyan and Kan, Min-Yen and Tang, Ai Chee},
title = {An empirical study of students' perceptions on e-learning systems},
year = {2008},
publisher = {Singapore Therapeutic, Assistive \& Rehabilitative Technologies (START) Centre},
address = {Midview City, SGP},
abstract = {As e-learning becomes increasingly popular in learning, it is necessary to evaluate students' perceptions on e-learning systems to enhance the quality of the systems. This paper describes a survey on a group of 451 students from a local university using edveNTUre, a proprietary e-learning system powered by Blackboard. Using the Technology Acceptance Model (TAM), the study aimed to investigate the factors leading to perceived usefulness, perceived ease of use and behavioural intention to use the e-learning system. This paper concludes with a discussion on the interaction design of e-learning systems from students' perspectives.},
booktitle = {Proceedings of the 2nd International Convention on Rehabilitation Engineering \& Assistive Technology},
pages = {245–249},
numpages = {5},
keywords = {usefulness, usability, technology acceptance model, interface design patterns, e-learning},
location = {Bangkok, Thailand},
series = {iCREATe '08}
}
@article{10.1145/1314215.1314231,
author = {Kan, Min-Yen and Tan, Yee Fan},
title = {Record matching in digital library metadata},
year = {2008},
issue_date = {February 2008},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {51},
number = {2},
issn = {0001-0782},
url = {https://doi.org/10.1145/1314215.1314231},
doi = {10.1145/1314215.1314231},
abstract = {Using evidence from external sources to create more accurate matching systems.},
journal = {Commun. ACM},
month = feb,
pages = {91–94},
numpages = {4}
}
@article{10.1109/TASL.2007.911559,
author = {Kan, Min-Yen and Wang, Ye and Iskandar, D. and New, Tin Lay and Shenoy, A.},
title = {LyricAlly: Automatic Synchronization of Textual Lyrics to Acoustic Music Signals},
year = {2008},
issue_date = {February 2008},
publisher = {IEEE Press},
volume = {16},
number = {2},
issn = {1558-7916},
url = {https://doi.org/10.1109/TASL.2007.911559},
doi = {10.1109/TASL.2007.911559},
abstract = {We present LyricAlly, a prototype that automatically aligns acoustic musical signals with their corresponding textual lyrics, in a manner similar to manually-aligned karaoke. We tackle this problem based on a multimodal approach, using an appropriate pairing of audio and text processing to create the resulting prototype. LyricAlly's acoustic signal processing uses standard audio features but constrained and informed by the musical nature of the signal. The resulting detected hierarchical rhythm structure is utilized in singing voice detection and chorus detection to produce results of higher accuracy and lower computational costs than their respective baselines. Text processing is employed to approximate the length of the sung passages from the lyrics. Results show an average error of less than one bar for per-line alignment of the lyrics on a test bed of 20 songs (sampled from CD audio and carefully selected for variety). We perform a comprehensive set of system-wide and per-component tests and discuss their results. We conclude by outlining steps for further development.},
journal = {Trans. Audio, Speech and Lang. Proc.},
month = feb,
pages = {338–349},
numpages = {12},
keywords = {Acoustic signal detection, acoustic signal processing, music, text processing}
}
@inproceedings{10.5555/1780653.1780707,
author = {Nguyen, Thuy Dung and Kan, Min-Yen},
title = {Keyphrase extraction in scientific publications},
year = {2007},
isbn = {3540770933},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
abstract = {We present a keyphrase extraction algorithm for scientific publications. Different from previous work, we introduce features that capture the positions of phrases in document with respect to logical sections found in scientific discourse. We also introduce features that capture salient morphological phenomena found in scientific keyphrases, such as whether a candidate keyphrase is an acronyms or uses specific terminologically productive suffixes. We have implemented these features on top of a baseline feature set used by Kea [1]. In our evaluation using a corpus of 120 scientific publications multiply annotated for keyphrases, our system significantly outperformed Kea at the p < .05 level. As we know of no other existing multiply annotated keyphrase document collections, we have also made our evaluation corpus publicly available. We hope that this contribution will spur future comparative research.},
booktitle = {Proceedings of the 10th International Conference on Asian Digital Libraries: Looking Back 10 Years and Forging New Frontiers},
pages = {317–326},
numpages = {10},
location = {Hanoi, Vietnam},
series = {ICADL'07}
}
@inproceedings{10.1145/1316902.1316922,
author = {Elmacioglu, Ergin and Kan, Min-Yen and Lee, Dongwon and Zhang, Yi},
title = {Web based linkage},
year = {2007},
isbn = {9781595938299},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1316902.1316922},
doi = {10.1145/1316902.1316922},
abstract = {When a variety of names are used for the same real-world entity, the problem of detecting all such variants has been known as the (record) linkage or entity resolution problem. In this paper, toward this problem, we propose a novel approach that uses the Web as the collective knowledge source in addition to contents of entities. Our hypothesis is that if an entity e1 is a duplicate of another entity e2, and if e1 frequently appears together with information I on the Web, then e2 may appear frequently with I on the Web. By using search engines, we analyze the frequency, URLs, or contents of the returned web pages to capture the information I of an entity. Extensive experiments verify that our hypothesis holds in many real settings, and the idea of using the Web as the additional source for the linkage problem is promising. Our proposal shows 51\% (on average) and 193\% (at best) improvement in precision/recall compared to a baseline approach.},
booktitle = {Proceedings of the 9th Annual ACM International Workshop on Web Information and Data Management},
pages = {121–128},
numpages = {8},
keywords = {entity resolution, record linkage},
location = {Lisbon, Portugal},
series = {WIDM '07}
}
@article{10.1016/j.ipm.2007.03.010,
author = {Ye, Shiren and Chua, Tat-Seng and Kan, Min-Yen and Qiu, Long},
title = {Document concept lattice for text understanding and summarization},
year = {2007},
issue_date = {November, 2007},
publisher = {Pergamon Press, Inc.},
address = {USA},
volume = {43},
number = {6},
issn = {0306-4573},
url = {https://doi.org/10.1016/j.ipm.2007.03.010},
doi = {10.1016/j.ipm.2007.03.010},
abstract = {We argue that the quality of a summary can be evaluated based on how many concepts in the original document(s) that can be preserved after summarization. Here, a concept refers to an abstract or concrete entity or its action often expressed by diverse terms in text. Summary generation can thus be considered as an optimization problem of selecting a set of sentences with minimal answer loss. In this paper, we propose a document concept lattice that indexes the hierarchy of local topics tied to a set of frequent concepts and the corresponding sentences containing these topics. The local topics will specify the promising sub-spaces related to the selected concepts and sentences. Based on this lattice, the summary is an optimized selection of a set of distinct and salient local topics that lead to maximal coverage of concepts with the given number of sentences. Our summarizer based on the concept lattice has demonstrated competitive performance in Document Understanding Conference 2005 and 2006 evaluations as well as follow-on tests.},
journal = {Inf. Process. Manage.},
month = nov,
pages = {1643–1662},
numpages = {20},
keywords = {Text summarization, Semantic, Document concept lattice, Concept}
}
@inproceedings{10.5555/1621474.1621532,
author = {Elmacioglu, Ergin and Tan, Yee Fan and Yan, Su and Kan, Min-Yen and Lee, Dongwon},
title = {PSNUS: web people name disambiguation by simple clustering with rich features},
year = {2007},
publisher = {Association for Computational Linguistics},
address = {USA},
abstract = {We describe about the system description of the PSNUS team for the SemEval-2007 Web People Search Task. The system is based on the clustering of the web pages by using a variety of features extracted and generated from the data provided. This system achieves Fα=0.5 = 0.75 and Fα=0.2 = 0.78 for the final test data set of the task.},
booktitle = {Proceedings of the 4th International Workshop on Semantic Evaluations},
pages = {268–271},
numpages = {4},
location = {Prague, Czech Republic},
series = {SemEval '07}
}
@inproceedings{10.1145/1255175.1255239,
author = {Gozali, Jesse Prabawa and Kan, Min-Yen},
title = {A rich OPAC user interface with AJAX},
year = {2007},
isbn = {9781595936448},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1255175.1255239},
doi = {10.1145/1255175.1255239},
abstract = {Open Public Access Catalogs (OPACs) provide patrons with a user interface (UI) to help their information seeking tasks. Even though many OPAC UIs are now web-based, their architectures are often static, which does not allow them to integrate user assistance modules dynamically. We report on a UI that supports integration of such modules, while providing a usable and rich environment. We explore how Asynchronous JavaScript + XML (AJAX) can be employed to create an OPAC UI that offers a better user experience and task support. Our developed UI features a modular architecture that combines several Natural Language Processing (NLP) modules employed to enhance information seeking. Our UI manages queries in a novel way with a tabbed interface featuring an overview/details presentation model, and an AJAX query results data grid. Preliminary user testing results are also presented.},
booktitle = {Proceedings of the 7th ACM/IEEE-CS Joint Conference on Digital Libraries},
pages = {329–330},
numpages = {2},
keywords = {AJAX, GUI, OPAC, human-computer interaction},
location = {Vancouver, BC, Canada},
series = {JCDL '07}
}
@inproceedings{10.1145/1255175.1255192,
author = {Kan, Min-Yen},
title = {SlideSeer: a digital library of aligned document and presentation pairs},
year = {2007},
isbn = {9781595936448},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1255175.1255192},
doi = {10.1145/1255175.1255192},
abstract = {Research findings are often transmitted both as written documents and narrated slide presentations. As these two forms of media contain both unique and replicated information, it is useful to combine and align these two views to create a single synchronized medium. We introduce SlideSeer, a digital library that discovers, aligns and presents such presentation and document pairs. We discuss the three major system components of the SlideSeer DL: 1) the resource discovery, 2) the fine-grained alignment and 3) the user interface. For resource discovery, we have bootstrapped our collection building process using metadata from DBLP and CiteSeer. For alignment, we modify maximum similarity alignment to favor monotonic alignments and incorporate a classifier to handle slides which should not be aligned. For the user interface, we allow the user to seamlessly switch between four carefully motivated views of the resulting synchronized media pairs.},
booktitle = {Proceedings of the 7th ACM/IEEE-CS Joint Conference on Digital Libraries},
pages = {81–90},
numpages = {10},
keywords = {SlideSeer, digital library, fine-grained alignment, presentations (slides), synchronized media},
location = {Vancouver, BC, Canada},
series = {JCDL '07}
}
@article{10.1145/1229179.1229182,
author = {Cui, Hang and Kan, Min-Yen and Chua, Tat-Seng},
title = {Soft pattern matching models for definitional question answering},
year = {2007},
issue_date = {April 2007},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
volume = {25},
number = {2},
issn = {1046-8188},
url = {https://doi.org/10.1145/1229179.1229182},
doi = {10.1145/1229179.1229182},
abstract = {We explore probabilistic lexico-syntactic pattern matching, also known as soft pattern matching, in a definitional question answering system. Most current systems use regular expression-based hard matching patterns to identify definition sentences. Such rigid surface matching often fares poorly when faced with language variations. We propose two soft matching models to address this problem: one based on bigrams and the other on the Profile Hidden Markov Model (PHMM). Both models provide a theoretically sound method to model pattern matching as a probabilistic process that generates token sequences. We demonstrate the effectiveness of the models on definition sentence retrieval for definitional question answering. We show that both models significantly outperform the state-of-the-art manually constructed hard matching patterns on recent TREC data.A critical difference between the two models is that the PHMM has a more complex topology. We experimentally show that the PHMM can handle language variations more effectively but requires more training data to converge.While we evaluate soft pattern models only on definitional question answering, we believe that both models are generic and can be extended to other areas where lexico-syntactic pattern matching can be applied.},
journal = {ACM Trans. Inf. Syst.},
month = apr,
pages = {8–es},
numpages = {30},
keywords = {definitional question answering, Soft patterns}
}
@article{10.1016/j.ipm.2006.07.019,
author = {Lu, Wei and Kan, Min-Yen},
title = {Supervised categorization of JavaScriptTM using program analysis features},
year = {2007},
issue_date = {March 2007},
publisher = {Pergamon Press, Inc.},
address = {USA},
volume = {43},
number = {2},
issn = {0306-4573},
url = {https://doi.org/10.1016/j.ipm.2006.07.019},
doi = {10.1016/j.ipm.2006.07.019},
abstract = {Web pages often embed scripts for a variety of purposes, including advertising and dynamic interaction. Understanding embedded scripts and their purpose can often help to interpret or provide crucial information about the web page. We have developed a functionality-based categorization of JavaScript, the most widely used web page scripting language. We then view understanding embedded scripts as a text categorization problem. We show how traditional information retrieval methods can be augmented with the features distilled from the domain knowledge of JavaScript and software analysis to improve classification performance. We perform experiments on the standard WT10G web page corpus, and show that our techniques eliminate over 50\% of errors over a standard text classification baseline.},
journal = {Inf. Process. Manage.},
month = mar,
pages = {431–444},
numpages = {14},
keywords = {ECMAScript, JavaScript, automated code classification, information retrieval, machine learning, program classification, program comprehension, program pattern, software metrics, source clone}
}
@book{10.5555/1196405,
author = {Ng, Hwee Tou and Leong, Mun-Kew and Kan, Min-Yen and Ji, Donghong},
title = {Information Retrieval Technology: Third Asia Information Retrieval Symposium, AIRS 2006, Singapore, October 16-18, 2006, Proceedings (Lecture Notes in Computer Science)},
year = {2006},
isbn = {3540457801},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg}
}
@inproceedings{10.1007/11788034_48,
author = {Wang, Fei and Kan, Min-Yen},
title = {NPIC: hierarchical synthetic image classification using image search and generic features},
year = {2006},
isbn = {3540360182},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
url = {https://doi.org/10.1007/11788034_48},
doi = {10.1007/11788034_48},
abstract = {We introduce NPIC, an image classification system that focuses on synthetic (e.g., non-photographic) images. We use class-specific keywords in an image search engine to create a noisily labeled training corpus of images for each class. NPIC then extracts both content-based image retrieval (CBIR) features and metadata-based textual features for each image for machine learning. We evaluate this approach on three different granularities: 1) natural vs. synthetic, 2) map vs. figure vs. icon vs. cartoon vs. artwork 3) and further subclasses of the map and figure classes. The NPIC framework achieves solid performance (99\%, 97\% and 85\% in cross validation, respectively). We find that visual features provide a significant boost in performance, and that textual and visual features vary in usefulness at the different levels of granularities of classification.},
booktitle = {Proceedings of the 5th International Conference on Image and Video Retrieval},
pages = {473–482},
numpages = {10},
location = {Tempe, AZ},
series = {CIVR'06}
}
@inproceedings{10.1007/11788034_48,
author = {Wang, Fei and Kan, Min-Yen},
title = {NPIC: hierarchical synthetic image classification using image search and generic features},
year = {2006},
isbn = {3540360182},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
url = {https://doi.org/10.1007/11788034_48},
doi = {10.1007/11788034_48},
abstract = {We introduce NPIC, an image classification system that focuses on synthetic (e.g., non-photographic) images. We use class-specific keywords in an image search engine to create a noisily labeled training corpus of images for each class. NPIC then extracts both content-based image retrieval (CBIR) features and metadata-based textual features for each image for machine learning. We evaluate this approach on three different granularities: 1) natural vs. synthetic, 2) map vs. figure vs. icon vs. cartoon vs. artwork 3) and further subclasses of the map and figure classes. The NPIC framework achieves solid performance (99\%, 97\% and 85\% in cross validation, respectively). We find that visual features provide a significant boost in performance, and that textual and visual features vary in usefulness at the different levels of granularities of classification.},
booktitle = {Proceedings of the 5th International Conference on Image and Video Retrieval},
pages = {473–482},
numpages = {10},
location = {Tempe, AZ},
series = {CIVR'06}
}
@inproceedings{10.1007/11788034_15,
author = {Neo, Shi-Yong and Zhao, Jin and Kan, Min-Yen and Chua, Tat-Seng},
title = {Video retrieval using high level features: exploiting query matching and confidence-based weighting},
year = {2006},
isbn = {3540360182},
publisher = {Springer-Verlag},
address = {Berlin, Heidelberg},
url = {https://doi.org/10.1007/11788034_15},
doi = {10.1007/11788034_15},
abstract = {Recent research in video retrieval has focused on automated, high-level feature indexing on shots or frames. One important application of such indexing is to support precise video retrieval. We report on extensions of this semantic indexing on news video retrieval. First, we utilize extensive query analysis to relate various high-level features and query terms by matching the textual description and context in a time-dependent manner. Second, we introduce a framework to effectively fuse the relation weights with the detectors' confidence scores. This results in individual high level features that are weighted on a per-query basis. Tests on the TRECVID 2005 dataset show that the above two enhancements yield significant improvement in performance over a corresponding state-of-the-art video retrieval baseline.},
booktitle = {Proceedings of the 5th International Conference on Image and Video Retrieval},
pages = {143–152},
numpages = {10},
location = {Tempe, AZ},
series = {CIVR'06}
}
@inproceedings{10.1145/1076034.1076101,
author = {Cui, Hang and Kan, Min-Yen and Chua, Tat-Seng},
title = {Generic soft pattern models for definitional question answering},
year = {2005},
isbn = {1595930345},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1076034.1076101},
doi = {10.1145/1076034.1076101},
abstract = {This paper explores probabilistic lexico-syntactic pattern matching, also known as soft pattern matching. While previous methods in soft pattern matching are ad hoc in computing the degree of match, we propose two formal matching models: one based on bigrams and the other on the Profile Hidden Markov Model (PHMM). Both models provide a theoretically sound method to model pattern matching as a probabilistic process that generates token sequences. We demonstrate the effectiveness of these models on definition sentence retrieval for definitional question answering. We show that both models significantly outperform state-of-the-art manually constructed patterns. A critical difference between the two models is that the PHMM technique handles language variations more effectively but requires more training data to converge. We believe that both models can be extended to other areas where lexico-syntactic pattern matching can be applied.},
booktitle = {Proceedings of the 28th Annual International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {384–391},
numpages = {8},
keywords = {definitional question answering, probabilistic models, soft pattern},
location = {Salvador, Brazil},
series = {SIGIR '05}
}
@inproceedings{10.1145/1065385.1065406,
author = {Kan, Min-Yen and Poo, Danny C. C.},
title = {Detecting and supporting known item queries in online public access catalogs},
year = {2005},
isbn = {1581138768},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1065385.1065406},
doi = {10.1145/1065385.1065406},
abstract = {When users seek to find specific resources in a digital library, they often use the library catalog to locate them. These catalog queries are defined as known item queries. As known item queries search for specific resources, it is important to manage them differently from other search types, such as area searches. We study how to identify known item queries in the context of a large academic institution's online public access catalog (OPAC), in which queries are issued via a simple keyword interface. We also examine how to recognize when a known item query has retrieved the item in question. Our approach combines techniques in machine learning, language modeling and machine translation evaluation metrics to build a classifier capable of distinguishing known item queries and correctly classifies titles for whether they are the known item sought with an 80\% and 95\% correlation to human performance, respectively on each task. To our knowledge, this is the first report of such work, which has the potential to streamline the user interface of both OPACs and digital libraries in support of known item searches.},
booktitle = {Proceedings of the 5th ACM/IEEE-CS Joint Conference on Digital Libraries},
pages = {91–99},
numpages = {9},
keywords = {known item queries, query language model, query types},
location = {Denver, CO, USA},
series = {JCDL '05}
}
@article{10.1016/j.artmed.2004.07.018,
author = {Elhadad, N. and Kan, M. -Y. and Klavans, J. L. and McKeown, K. R.},
title = {Customization in a unified framework for summarizing medical literature},
year = {2005},
issue_date = {February, 2005},
publisher = {Elsevier Science Publishers Ltd.},
address = {GBR},
volume = {33},
number = {2},
issn = {0933-3657},
url = {https://doi.org/10.1016/j.artmed.2004.07.018},
doi = {10.1016/j.artmed.2004.07.018},
abstract = {Objective:: We present the summarization system in the PErsonalized Retrieval and Summarization of Images, Video and Language (PERSIVAL) medical digital library. Although we discuss the context of our summarization research within the PERSIVAL platform, the primary focus of this article is on strategies to define and generate customized summaries. Methods and material:: Our summarizer employs a unified user model to create a tailored summary of relevant documents for either a physician or lay person. The approach takes advantage of regularities in medical literature text structure and content to fulfill identified user needs. Results:: The resulting summaries combine both machine-generated text and extracted text that comes from multiple input documents. Customization includes both group-based modeling for two classes of users, physician and lay person, and individually driven models based on a patient record. Conclusions:: Our research shows that customization is feasible in a medical digital library.},
journal = {Artif. Intell. Med.},
month = feb,
pages = {179–198},
numpages = {20},
keywords = {Clinical information system, Medical digital library, Multi-document information extraction, Multi-document summarization, User modeling}
}
@inproceedings{10.1145/1031453.1031478,
author = {Lee, Chee How and Kan, Min-Yen and Lai, Sandra},
title = {Stylistic and lexical co-training for web block classification},
year = {2004},
isbn = {1581139780},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/1031453.1031478},
doi = {10.1145/1031453.1031478},
abstract = {Many applications which use web data extract information from a limited number of regions on a web page. As such, web page division into blocks and the subsequent block classification have become a preprocessing step. We introduce PARCELS, an open-source, co-trained approach that performs classification based on separate stylistic and lexical views of the web page. Unlike previous work, PARCELS performs classification on fine-grained blocks. In addition to table-based layout, the system handles real-world pages which feature layout based on divisions and spans as well as stylistic inference for pages using cascaded style sheets. Our evaluation shows that the co-training process results in a reduction of 28.5\% in error rate over a single-view classifier and that our approach is comparable to other state-of-the-art systems.},
booktitle = {Proceedings of the 6th Annual ACM International Workshop on Web Information and Data Management},
pages = {136–143},
numpages = {8},
keywords = {web page division, web page block classification, lexical and stylistic learners, co-training, PARCELS},
location = {Washington DC, USA},
series = {WIDM '04}
}
@article{10.1002/asi.10364,
author = {Kan, Min-Yen},
title = {Review of "Introduction to digital libraries by G. G. Chowdhury and Sudatta Chowdhury" London: Facet 2003},
year = {2004},
issue_date = {January 15, 2004},
publisher = {John Wiley \& Sons, Inc.},
address = {USA},
volume = {55},
number = {2},
issn = {1532-2882},
url = {https://doi.org/10.1002/asi.10364},
doi = {10.1002/asi.10364},
journal = {J. Am. Soc. Inf. Sci. Technol.},
month = jan,
pages = {178–179},
numpages = {2}
}
@phdthesis{10.5555/936671,
author = {Kan, Min-Yen and Mckeown, Kathleen R. and Klavans, Judith L.},
title = {Automatic text summarization as applied to information retrieval: using indicative and informative summaries},
year = {2003},
isbn = {0493910808},
publisher = {Columbia University},
address = {USA},
note = {AAI3071379}
}
@inproceedings{10.1145/544220.544227,
author = {Kan, Min-Yen and Klavans, Judith L.},
title = {Using librarian techniques in automatic text summarization for information retrieval},
year = {2002},
isbn = {1581135130},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/544220.544227},
doi = {10.1145/544220.544227},
abstract = {A current application of automatic text summarization is to provide an overview of relevant documents coming from an information retrieval (IR) system. This paper examines how Centrifuser, one such summarization system, was designed with respect to methods used in the library community. We have reviewed these librarian expert techniques to assist information seekers and codified them into eight distinct strategies. We detail how we have operationalized six of these strategies in Centrifuser by computing an informative extract, indicative differences between documents, as well as navigational links to narrow or broaden a user's query. We conclude the paper with results from a preliminary evaluation.},
booktitle = {Proceedings of the 2nd ACM/IEEE-CS Joint Conference on Digital Libraries},
pages = {36–45},
numpages = {10},
keywords = {automatic text summarization, information retrieval user interfaces, reference librarian techniques},
location = {Portland, Oregon, USA},
series = {JCDL '02}
}
@inproceedings{10.1145/379437.379784,
author = {Elhadad, Noemie and Kan, Min-Yen and Lok, Simon and Muresan, Smaranda},
title = {PERSIVAL: personalized summarization over multimedia health-care information},
year = {2001},
isbn = {1581133456},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
url = {https://doi.org/10.1145/379437.379784},
doi = {10.1145/379437.379784},
abstract = {In this demonstration, we present several integrated components of PER SIVAL PErsonalized Retrieval and Summarization of Image, Video And anguage)[1], a system designed to provide personalized access to a distributed digital library of medical literature and consumer health information. The global system architecture of PERSIVAL is best described as a two-stage processing pipeline. The first stage is a retrieval system that matches user queries with relevant multimedia data in the library. The second stage is a visualization system that processes the multimedia data matched by the first stage for display.Our demonstration focuses on the second stage of PERSIVAL's processing pipeline. Given a set of relevant documents for certain predefined queries, our integrated demonstration seeks to give a tailored response for either physicians or patients, featuring textual summaries, as well as relevant medical definitions. To visualize the summaries and definitions, we employ automated constraint-based layout of the user interface that allows for rich interaction between summaries and definitions.PERSIVAL's natural language processing and user interface modules make up the visualization portion of the system and illustrate state-of-the-art digital library technology. Following are the modules presented in our demonstration.},
booktitle = {Proceedings of the 1st ACM/IEEE-CS Joint Conference on Digital Libraries},
pages = {455},
location = {Roanoke, Virginia, USA},
series = {JCDL '01}
}