D. Sonntag, "Computational Sustainability and Technology (CST)" Deutsches Forschungszentrum für Künstliche Intelligenz GmbH2025.
@techreport{pub15565, series = {Technical Note},
abstract = {Computational sustainability (CS) is the scientific field that aims to balance societal, economic, and environmental resources using methods from computer science and artificial intelligence (AI). AI models, e.g., machine learning models, enrich models of computational sustainability. Research in interactive machine learning can make important contributions to help address key challenges of sustainability (AI for CS). Computational sustainability questions enrich AI research, not only by providing problems that involve uncertainty or vagueness, thus generating compelling new AI challenges, but also by providing a requirement framework for resource-bounded computation (CS for AI). The research department Interactive machine learning of the German Research Center for Artificial Intelligence hosts “Computational Sustainability & Technology”; we use applied artificial intelligence methods in the areas of machine learning, knowledge representation, and intelligent user interfaces to help achieve more sustainable systems (AI for CS) or to build more sustainable AI systems (CS for AI). Using the power of, for example, deep learning computers, we can process large quantities of information and allocate resources based on real-time information. On the other hand, we have to decide when to regulate the power consumption of such AI systems. Applications are widespread. For example, smart grids implement renewable resources and storage capabilities to control the production and expenditure of energy. In the project Seadash, we work on integrating machine learning methods for event detection and classification of underwater signals to preserve marine fauna. Further, together with edge computing (the new distributed computing paradigm that brings computation and data storage closer to the location where it is needed) we do not only improve response times and save bandwidth, but also reduce energy consumption (Mobile AI Lab). The theory of computational sustainability includes aspects from game theory, machine learning theory and human computer interaction theory. For example, climate change, pollution, and other environmental crises can be explained by theories of human psychology (e.g., the individual in a social world) and can hence be computed by (machine learning) models with computational models of the Prisoner’s Dilemma. More is More? More computation is not always more, as unsustainable consumption of energy should be avoided. There are already interesting approaches in the machine learning community, e.g., towards the systematic reporting of the energy and carbon footprints of machine learning or looking at methodological issues related to training on big data and large web corpora where billions or even trillions of parameters are tuned. Humans, on the contrary, can do such “training” with only a few examples or from simple instructions (cf. interactive machine learning, https://www.dfki.de/iml/). AI for CS and CS for AI and the application domains bring us back to the main challenges of artificial intelligence research and applied research in the area of CS technology: (1) Incompleteness, (2) vagueness, (3) uncertainty and reasoning (in deep learning), and (4) resource-bounded computation and learning. In our projects, we tackle these theoretical challenges and focus on imitation learning, learning with small datasets, transfer learning, long term autonomy of sustainable AI systems, never ending learning, hybrid teams, IoT, multi-sensor streams for small interaction devices, mobile computing platforms (Mobile AI Lab), and the efficient use of big deep learning clusters.},
year = {2025},
title = {Computational Sustainability and Technology (CST)},
volume = {-},
institution = {Deutsches Forschungszentrum für Künstliche Intelligenz GmbH},
author = {Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15565_Technical_Note__CST_.pdf}
}
H. M. D. Nguyen, A. T. Le, T. Q. Nguyen, N. T. Diep, T. Nguyen, D. Duong-Tran, J. Peters, L. Shen, M. Niepert, und D. Sonntag, "Dude: Dual Distribution-Aware Context Prompt Learning For Large Vision-Language Model" in Proc. The 16th Asian Conference on Machine Learning. Asian Conference on Machine Learning (ACML-2024), December 5-8, 2025.
@inproceedings{pub15620, abstract = {Prompt learning methods are gaining increasing attention due to their ability to customize large vision-language models to new domains using pre-trained contextual knowledge and minimal training data. However, existing works typically rely on optimizing unified prompt inputs, often struggling with fine-grained classification tasks due to insufficient discriminative attributes. To tackle this, we consider a new framework based on a dual context of both domain-shared and class-specific contexts, where the latter is generated by Large Language Models (LLMs) such as GPTs. Such dual prompt methods enhance the model’s feature representation by joining implicit and explicit factors encoded in LLM knowledge. Moreover, we formulate the Unbalanced Optimal Transport (UOT) theory to quantify the relationships between constructed prompts and visual tokens. Through partial matching, UOT can properly align discrete sets of visual tokens and prompt embeddings under different mass distributions, which is particularly valuable for handling irrelevant or noisy elements, ensuring that the preservation of mass does not restrict transport solutions. Furthermore, UOT’s characteristics integrate seamlessly with image augmentation, expanding the training sample pool while maintaining a reasonable distance between perturbed images and prompt inputs. Extensive experiments across few-shot classification and adapter settings substantiate the superiority of our model over current state-of-the-art baselines.},
year = {2025},
title = {Dude: Dual Distribution-Aware Context Prompt Learning For Large Vision-Language Model},
booktitle = {The 16th Asian Conference on Machine Learning. Asian Conference on Machine Learning (ACML-2024), December 5-8},
publisher = {Proceedings of Machine Learning Research},
author = {Ho Minh Duy Nguyen and An T. Le and Trung Q. Nguyen and Nghiem T. Diep and Tai Nguyen and Duy Duong-Tran and Jan Peters and Li Shen and Mathias Niepert and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15620_199_Dude_Dual_Distribution_Awa.pdf}
}
R. Leist, H. Profitlich, T. Hunsicker, und D. Sonntag, "Towards Trustable Clinical Decision Support Systems: A User Study with Ophthalmologists" in Proc. IUI '25: Proceedings of the 30th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2025), March 24-27, Cagliari, Italy, 2025.
@inproceedings{pub15769, abstract = {Integrating Artificial Intelligence (AI) into Clinical Decision Support Systems (CDSS) presents significant opportunities for improving healthcare delivery, particularly in fields like ophthalmology. This paper explores the usability and trustworthiness of an AI-driven CDSS designed to assist ophthalmologists in treating diabetic retinopathy and age-related macular degeneration. Therefore, we created a CDSS and evaluated its impact on efficiency, informedness, and user experience through task-based semi-structured interviews and questionnaires with 11 ophthalmologists. The usability of the CDSS was rated highly, with a SUS of 81.75. Additionally, results show that participants felt like the CDSS would improve their efficiency and informedness with one major aspect being integrating Electronic Health Records (EHR) and Optical Coherence Tomography (OCT) data into a single interface. Additionally, we explored aspects of the trustworthiness of AI components, specifically OCT segmentation, treatment recommendation, and visual acuity forecasting. Through thematic analysis, we identified key factors influencing trustworthiness and clinical adoption. Results show that a larger degree of abstraction from input to output of a model correlates with decreased trust. From our findings, we propose three guidelines for designing trustworthy CDSS.},
month = {3},
year = {2025},
title = {Towards Trustable Clinical Decision Support Systems: A User Study with Ophthalmologists},
booktitle = {IUI '25: Proceedings of the 30th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2025), March 24-27, Cagliari, Italy},
isbn = {979-8-4007-1306-4},
publisher = {Association for Computing Machinery, New York, NY, United States},
author = {Robert Leist and Hans-Jürgen Profitlich and Tim Hunsicker and Daniel Sonntag},
keywords = {Clinical Decision Support Systems (CDSS), Interactive Machine Learning (IML), Human-AI collaboration, AI-assisted decision making, user trust, domain experts, ophthalmology},
url = {https://dl.acm.org/doi/10.1145/3708359.3712136 https://www.dfki.de/fileadmin/user_upload/import/15769_3708359.3712136.pdf}
}
FunduScope: A Human-centered Tool for ML-assisted e-Learning in Ophthalmology. International Conference on Intelligent User Interfaces (IUI-2025), located at IUI-2025, March 24-27, Cagliari, ItalyACM.
doi: 10.1145/3708557.3716356
@proceedings{pub15775, abstract = {Interpreting fundus images is an essential skill for diagnosing eye diseases, such as diabetic retinopathy (DR), one of the leading causes of visual impairment. However, the training of junior doctors relies on experienced ophthalmologists, who often lack time for teaching, or on printed training material that lacks variability in examples. Additionally, machine learning (ML) models successfully detect pathologies relevant to DR and grade the corresponding severity level of cases. With that, our work combines advances in ML with the need of junior doctors to learn independently. We present an interactive learning tool for ophthalmology, which lets junior doctors mark pathologies in fundus images and check them upon the solution of an applied ML algorithm. By aligning the learning concept with theories of cognitive load, usability, and e-learning factors, this system serves as a testbed to explore the potential of ML-supported learning tools for medical education, advancing interactive e-learning.},
year = {2025},
title = {FunduScope: A Human-centered Tool for ML-assisted e-Learning in Ophthalmology. International Conference on Intelligent User Interfaces (IUI-2025), located at IUI-2025, March 24-27, Cagliari, Italy},
editor = {Sara-Jane Bittner and Michael Barz and Hans-Jürgen Profitlich and Mika P. Nieminen and Daniel Sonntag},
isbn = {979-8-4007-1409-2},
publisher = {ACM},
doi = {https://doi.org/10.1145/3708557.3716356},
keywords = {human-centered design, cognitive load, usability, e-learning, machine learning, learning tool},
organization = {ACM}
}
Interactive Multimodal Photobook Co-Creation in Virtual Reality. International Conference on Intelligent User Interfaces (IUI-2025), located at IUI-2025, March 24-27, Cagliari, ItalyACM.
doi: 10.1145/3708557.3716355
@proceedings{pub15776, abstract = {The integration of Multimodal-Multisensor Interface (MMI) technologies into Virtual Reality (VR) enables users to engage with computational systems in a natural and immersive way. However, these technologies remain underexplored when applied to deep learning (DL) systems in VR. This paper introduces a VR-based system designed to evaluate how users interact with DL models in virtual environments using MMI technologies, demonstrated through a photobook co-creation use case. The system facilitates human-AI collaboration (co-creation) by allowing users to work with DL models to create photobooks and supports incremental model learning based on user behaviour (Interactive DL) to produce personalised outputs. The tool features a Unity VR frontend that incorporates speech, gaze, and controller inputs. It has a modular backend architecture that allows seamless integration and testing of different DL models. This tool serves as a testbed for exploring MMI in immersive VR environments for both IDL and co-creation.},
year = {2025},
title = {Interactive Multimodal Photobook Co-Creation in Virtual Reality. International Conference on Intelligent User Interfaces (IUI-2025), located at IUI-2025, March 24-27, Cagliari, Italy},
editor = {Sara-Jane Bittner and Robert Leist and László Kopácsi and Omair Shahzad Bhatti and Abdulrahman Mohamed Selim and Michael Barz and Daniel Sonntag},
isbn = {979-8-4007-1409-2},
publisher = {ACM},
doi = {https://doi.org/10.1145/3708557.3716355},
keywords = {Virtual Reality, Interaction Design, Co-Creation, Interactive Deep Learning, Photobook Creation},
organization = {ACM}
}
S. Liang und D. Sonntag, "Explainable Biomedical Claim Verification with Large Language Models" in Proc. Joint Proceedings of the ACM IUI Workshops 2025. International Conference on Intelligent User Interfaces (IUI-2025), ACM IUI Workshops 2025, located at IUI-2025, March 24-27, Cagliari, Italy, 2025.
@inproceedings{pub15777, abstract = {Verification of biomedical claims is critical for healthcare decision-making, public health policy and scientific research. We present an interactive biomedical claim verification system by integrating LLMs, transparent model explanations, and user-guided justification. In the system, users first retrieve relevant scientific studies from a persistent medical literature corpus and explore how different LLMs perform natural language inference (NLI) within task-adaptive reasoning framework to classify each study as "Support," "Contradict," or "Not Enough Information" regarding the claim. Users can examine the model's reasoning process with additional insights provided by SHAP values that highlight word-level contributions to the final result. This combination enables a more transparent and interpretable evaluation of the model's decision-making process. A summary stage allows users to consolidate the results by selecting a result with narrative justification generated by LLMs. As a result, a consensus-based final decision is summarized for each retrieved study, aiming safe and accountable AI-assisted decision-making in biomedical contexts. We aim to integrate this explainable verification system as a component within a broader evidence synthesis framework to support human-AI collaboration.},
month = {3},
year = {2025},
title = {Explainable Biomedical Claim Verification with Large Language Models},
booktitle = {Joint Proceedings of the ACM IUI Workshops 2025. International Conference on Intelligent User Interfaces (IUI-2025), ACM IUI Workshops 2025, located at IUI-2025, March 24-27, Cagliari, Italy},
publisher = {Joint Proceedings of the ACM IUI Workshops 2025},
author = {Siting Liang and Daniel Sonntag},
keywords = {Biomedical Claim Verification, Large Language Models, Natural Language Inference, Explainable AI},
url = {https://axai.trx.li/papers/5.pdf https://www.dfki.de/fileadmin/user_upload/import/15777_explainable_biomedical_claim_verification_with_LLMs.pdf}
}
R. Saghir, I. B. Campos, T. Gouvea, und D. Sonntag, "EcoScape Analyzer: A Tool for Performing Soundscape Analysis With Flexible Pipeline for Biodiversity Assessment" in Proc. IUI '25: Proceedings of the 30th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2025), March 24-27, Cagliari, Italy, 2025.
doi: doi/10.1145/3708557.3716359
@inproceedings{pub15810, abstract = {EcoScape Analyzer addresses the need for flexible soundscape analysis to evaluate biodiversity and ecosystem health. Designed for Passive Acoustic Monitoring (PAM) data, it integrates diverse feature extraction methods (acoustic indices, self-supervised, and transfer learning embeddings), dimensionality reduction techniques, and clustering approaches. The tool offers adaptable pipelines, performance feedback, and visualizations, enabling ecologists to explore soundscape patterns and dynamics efficiently, without the need for custom implementations.},
month = {3},
year = {2025},
title = {EcoScape Analyzer: A Tool for Performing Soundscape Analysis With Flexible Pipeline for Biodiversity Assessment},
booktitle = {IUI '25: Proceedings of the 30th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2025), March 24-27, Cagliari, Italy},
pages = {137-140},
isbn = {9781450375139},
publisher = {Association for Computing Machinery, New York, NY, United States},
doi = {https://dl.acm.org/doi/10.1145/3708557.3716359},
author = {Rida Saghir and Ivan Braga Campos and Thiago Gouvea and Daniel Sonntag},
url = {https://dl.acm.org/doi/10.1145/3708557.3716359}
}
R. Saghir, "Flexible and Interpretable Soundscape Analysis for Biodiversity Assessment and Ecosystem Health for Domain Experts" in Proc. IUI '25: Proceedings of the 30th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2025), March 24-27, Cagliari, Italy, 2025.
doi: doi/full/10.1145/3708557.3716144
@inproceedings{pub15811, abstract = {Biodiversity loss is a major threat to global sustainability and achieving conservation goals requires informed governance, which depends on robust biodiversity monitoring. Passive Acoustic Monitoring (PAM) enables scalable, continuous data collection, but the vast amount of unlabelled audio data necessitates efficient analysis techniques. While traditional methods focus on species identification, soundscape analysis provides a broader view of ecosystem health by capturing acoustic diversity, temporal patterns, and human impact. To address these challenges, researchers have explored various feature extraction methods, including acoustic indices, predefined acoustical features (PAFs), and AI-based techniques like self-supervised and transfer learning. However, their effectiveness varies by task at hand, requiring careful selection, comparison and technical domain knowledge. This research focuses on development of a tool for soundscape analysis that allows users to flexibly switch between methods and compare outputs in ecologically meaningful ways. By integrating computational techniques with domain relevant information, this research aims to improve biodiversity monitoring and ecosystem assessment.},
month = {3},
year = {2025},
title = {Flexible and Interpretable Soundscape Analysis for Biodiversity Assessment and Ecosystem Health for Domain Experts},
booktitle = {IUI '25: Proceedings of the 30th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2025), March 24-27, Cagliari, Italy},
pages = {218-221},
isbn = {9781450375139},
publisher = {Association for Computing Machinery, New York, NY, United States},
doi = {https://dl.acm.org/doi/full/10.1145/3708557.3716144},
author = {Rida Saghir},
url = {https://dl.acm.org/doi/full/10.1145/3708557.3716144}
}
M. Aslan, M. Bosse, D. C. H. Ehlers, M. Hinz, P. Olschewski, J. Podszun, E. Scharlach, L. Selzer, Y. Wu, A. Anagnostopoulou, und D. Sonntag, "TextVision: A more efficient way to work with research" in Proc. Joint Proceedings of the ACM IUI Workshops 2025. International Conference on Intelligent User Interfaces (IUI-2025), March 24-28, Cagliari, Italy, 2025.
@inproceedings{pub15814, abstract = {Large language models remain constrained by the limitations of current user interfaces and interaction paradigms, which hinder their ability to process complex, multimodal information beyond simple text input and output. Our proposed interface, TextVision, aims to address this limitation by enhancing how researchers interact with AI, providing a wide range of functionalities for analyzing, editing, creating new documents, and facilitating collaboration. TextVision advances state-of-the-art human-AI interaction through improved usability and novel interaction techniques, enhancing scientific research and development workflows. As a result, the user can access integrated tools, including a text editor, a PDF viewer, and an AI assistant in a chatbot format. The AI assistant can provide answers based on user input and is context-aware. This output can be enhanced using the built-in prompt designing tool to create efficient, AI-optimized prompts. Users can also select between the latest proprietary LLMs and fine-tuned open-source models tailored for specific tasks.},
year = {2025},
title = {TextVision: A more efficient way to work with research},
booktitle = {Joint Proceedings of the ACM IUI Workshops 2025. International Conference on Intelligent User Interfaces (IUI-2025), March 24-28, Cagliari, Italy},
editor = {-},
publisher = {CEUR Proceedings},
author = {Melis Aslan and Maximilian Bosse and Daniel Christian Helmuth Ehlers and Marlon Hinz and Philipp Olschewski and Jannik Podszun and Elias Scharlach and Leon Selzer and Yukun Wu and Aliki Anagnostopoulou and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15814_TextVision_MIND_submission.pdf}
}
A. Anagnostopoulou, H. M. T. Alam, und D. Sonntag, Self-improving Scene Understanding with Vision-Language Knowledge Integration [Extended Abstract].
@misc{pub15815, abstract = {We propose an approach for personalised and contextualised image captioning. As pre-trained vision-language systems fail to capture details about the user’s intent, occasion, and other information related to the image, we envision a system that addresses these limitations. This approach has two key components for which we need to find suitable practical implementations: multimodal RAG and automatic prompt engineering. We outline our idea and review different possibilities to address these tasks.},
month = {3},
year = {2025},
title = {Self-improving Scene Understanding with Vision-Language Knowledge Integration [Extended Abstract]},
howpublished = {MIND workshop at the IUI'25 Conference},
author = {Aliki Anagnostopoulou and Hasan Md Tusfiqur Alam and Daniel Sonntag},
status_notes = {nicht publiziert - als Poster vorgestellt},
url = {https://www.dfki.de/fileadmin/user_upload/import/15815_ssuvlaki_extended_abstract.pdf}
}
R. Leist, H. Profitlich, und D. Sonntag, "An AI-driven Clinical Decision Support System for the Treatment of Diabetic Retinopathy and Age-related Macular Degeneration" in Proc. Joint Proceedings of the ACM IUI Workshops 2025. Workshop on Intelligent and Interactive Health User Interfaces (HealthIUI-2025), located at IUI-2025, March 24, Cagliary, Italy, 2025.
@inproceedings{pub15818, abstract = {Diabetic Retinopathy (DR) and Age-related Macular Degeneration (AMD) are among the leading causes of blindness worldwide. Despite the availability of treatments to prevent disease progression, the effectiveness of these interventions is often limited by inefficiencies in existing clinical software. Recent advancements in Artificial Intelligence (AI) offer the potential to enhance Clinical Decision Support Systems (CDSS), streamlining workflows and reducing the burden on healthcare providers. This paper introduces a CDSS designed to assist ophthalmologists in the management of DR and AMD, integrating three AI-driven components. First, we developed a segmentation model for automated analysis of medical imaging data. Second, we implemented a recommendation algorithm to guide treatment decisions. Finally, we utilized a time series forecasting model to enable predictive medicine. Our models were trained using real-world clinical data from 913 patients with AMD and 461 patients with DR. The system demonstrates promising performance, underscoring the importance of high-performing AI models in advancing CDSS for ophthalmology. The code for our CDSS is available here: https://github.com/DFKI-Interactive-Machine-Learning/ophthalmo-cdss.},
month = {5},
year = {2025},
title = {An AI-driven Clinical Decision Support System for the Treatment of Diabetic Retinopathy and Age-related Macular Degeneration},
booktitle = {Joint Proceedings of the ACM IUI Workshops 2025. Workshop on Intelligent and Interactive Health User Interfaces (HealthIUI-2025), located at IUI-2025, March 24, Cagliary, Italy},
publisher = {Association of Computing Machinery, New York, NY, United States},
author = {Robert Leist and Hans-Jürgen Profitlich and Daniel Sonntag},
keywords = {Health informatics, Interactive systems and tools, Visualization, Clinical Decision Support Systems (CDSS), Interactive Machine Learning (IML), Human-AI collaboration, AI-assisted decision making, ophthalmology},
organization = {Association of Computing Machinery},
url = {https://www.dfki.de/fileadmin/user_upload/import/15818_submission.pdf}
}
H. M. T. Alam, D. Srivastav, M. A. Kadir, und D. Sonntag, "Towards Interpretable Radiology Report Generation via Concept Bottlenecks Using a Multi-agentic RAG" in Proc. Advances in Information Retrieval - 47th European Conference on Information Retrieval, ECIR 2025, Lucca, Italy, April 6-10, 2025, Proceedings, Part III. European Conference on Information Retrieval (ECIR-2025), 47th European Conference on Information Retrieval, located at ECIR 2025, April 6-10, Lucca, Italy, 2025.
doi: 10.1007/978-3-031-88714-7_18
@inproceedings{pub15823, series = {Lecture Notes in Computer Science},
abstract = {Deep learning has advanced medical image classification, but interpretability challenges hinder its clinical adoption. This study enhances interpretability in Chest X-ray (CXR) classification by using concept bottleneck models (CBMs) and a multi-agent Retrieval-Augmented Generation (RAG) system for report generation. By modeling relationships between visual features and clinical concepts, we create interpretable concept vectors that guide a multi-agent RAG system to generate radiology reports, enhancing clinical relevance, explainability, and transparency. Evaluation of the generated reports using an LLM-as-a-judge confirmed the interpretability and clinical utility of our model’s outputs. On the COVID-QU dataset, our model achieved 81% classification accuracy and demonstrated robust report generation performance, with five key metrics ranging between 84% and 90%. This interpretable multi-agent framework bridges the gap between high-performance AI and the explainability required for reliable AI-driven CXR analysis in clinical settings. Our code will be released at https://github.com/tifat58/IRR-with-CBM-RAG},
month = {4},
year = {2025},
title = {Towards Interpretable Radiology Report Generation via Concept Bottlenecks Using a Multi-agentic RAG},
booktitle = {Advances in Information Retrieval - 47th European Conference on Information Retrieval, ECIR 2025, Lucca, Italy, April 6-10, 2025, Proceedings, Part III. European Conference on Information Retrieval (ECIR-2025), 47th European Conference on Information Retrieval, located at ECIR 2025, April 6-10, Lucca, Italy},
editor = {Claudia Hauff and Craig Macdonald and Dietmar Jannach and Gabriella Kazai and Franco Maria Nardini and Fabio Pinelli and Fabrizio Silvestri and Nicola Tonellotto},
volume = {15574},
pages = {201-209},
publisher = {Springer},
doi = {https://doi.org/10.1007/978-3-031-88714-7_18},
author = {Hasan Md Tusfiqur Alam and Devansh Srivastav and Md Abdul Kadir and Daniel Sonntag},
keywords = {Interpretable Radiology Report Generation, Concept Bottleneck Models , Multi-Agent RAG, Explainable AI, LLMs, VLMs},
url = {https://doi.org/10.1007/978-3-031-88714-7_18 https://www.dfki.de/fileadmin/user_upload/import/15823_Towards_interpretable_cbm.pdf}
}
H. M. T. Alam, D. Srivastav, A. M. Selim, M. A. Kadir, M. M. H. Shuvo, und D. Sonntag, "CBM-RAG: Demonstrating Enhanced Interpretability in Radiology Report Generation with Multi-Agent RAG and Concept Bottleneck Models" in Proc. Companion Proceedings of the 17th ACM SIGCHI Symposium on Engineering Interactive Computing Systems. ACM SIGCHI Symposium on Engineering Interactive Computing Systems (EICS-2025), 17th ACM SIGCHI Symposium on Engineering Interactive Computing Systems, located at EICS-2025, June 23-27, Trier, Germany, 2025.
doi: 10.1145/3731406.3731970
@inproceedings{pub15937, series = {EICS '25 Companion, EICS '25 Companion},
abstract = {Advancements in generative Artificial Intelligence (AI) hold great promise for automating radiology workflows, yet challenges in interpretability and reliability hinder clinical adoption. This paper presents an automated radiology report generation framework that combines Concept Bottleneck Models (CBMs) with a Multi-Agent Retrieval-Augmented Generation (RAG) system to bridge AI performance with clinical explainability. CBMs map chest X-ray features to human-understandable clinical concepts, enabling transparent disease classification. Meanwhile, the RAG system integrates multi-agent collaboration and external knowledge to produce contextually rich, evidence-based reports. Our demonstration showcases the system’s ability to deliver interpretable predictions, mitigate hallucinations, and generate high-quality, tailored reports with an interactive interface addressing accuracy, trust, and usability challenges. This framework provides a pathway to improving diagnostic consistency and empowering radiologists with actionable insights.},
month = {6},
year = {2025},
title = {CBM-RAG: Demonstrating Enhanced Interpretability in Radiology Report Generation with Multi-Agent RAG and Concept Bottleneck Models},
booktitle = {Companion Proceedings of the 17th ACM SIGCHI Symposium on Engineering Interactive Computing Systems. ACM SIGCHI Symposium on Engineering Interactive Computing Systems (EICS-2025), 17th ACM SIGCHI Symposium on Engineering Interactive Computing Systems, located at EICS-2025, June 23-27, Trier, Germany},
pages = {59-61},
isbn = {9798400718663},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3731406.3731970},
author = {Hasan Md Tusfiqur Alam and Devansh Srivastav and Abdulrahman Mohamed Selim and Md Abdul Kadir and Md Moktadirul Hoque Shuvo and Daniel Sonntag},
keywords = {Interpretable Radiology report generation, Disease classification, Medical imaging, Concept Bottleneck Models (CBM), Retrieval-Augmented Generation (RAG), Information Retrieval, VLMs, LLMs.},
url = {https://doi.org/10.1145/3731406.3731970 https://www.dfki.de/fileadmin/user_upload/import/15937_cbm_rag_eics_2025.pdf}
}
T. Maurer, A. M. Selim, H. M. T. Alam, M. Eiletz, M. Barz, und D. Sonntag, "InFL-UX: A Toolkit for Web-Based Interactive Federated Learning" in Proc. Companion Proceedings of the 17th ACM SIGCHI Symposium on Engineering Interactive Computing Systems. ACM SIGCHI Symposium on Engineering Interactive Computing Systems (EICS-2025), 17th ACM SIGCHI Symposium on Engineering Interactive Computing Systems, located at EICS-2025, June 23-27, Trier, Germany, 2025.
doi: 10.1145/3731406.3731972
@inproceedings{pub15938, series = {EICS '25 Companion, EICS '25 Companion},
abstract = {This paper presents InFL-UX, an interactive, proof-of-concept browser-based Federated Learning (FL) toolkit designed to integrate user contributions into the machine learning (ML) workflow. InFL-UX enables users across multiple devices to upload datasets, define classes, and collaboratively train classification models directly in the browser using modern web technologies. Unlike traditional FL toolkits, which often focus on backend simulations, InFL-UX provides a simple user interface for researchers to explore how users interact with and contribute to FL systems in real-world, interactive settings. InFL-UX bridges the gap between FL and interactive ML by prioritising usability and decentralised model training, empowering non-technical users to actively participate in ML classification tasks.},
month = {6},
year = {2025},
title = {InFL-UX: A Toolkit for Web-Based Interactive Federated Learning},
booktitle = {Companion Proceedings of the 17th ACM SIGCHI Symposium on Engineering Interactive Computing Systems. ACM SIGCHI Symposium on Engineering Interactive Computing Systems (EICS-2025), 17th ACM SIGCHI Symposium on Engineering Interactive Computing Systems, located at EICS-2025, June 23-27, Trier, Germany},
pages = {65-67},
isbn = {9798400718663},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3731406.3731972},
author = {Tim Maurer and Abdulrahman Mohamed Selim and Hasan Md Tusfiqur Alam and Matthias Eiletz and Michael Barz and Daniel Sonntag},
keywords = {Federated Learning; Interactive Machine Learning; Browser-based Deep Learning},
url = {https://doi.org/10.1145/3731406.3731972 https://www.dfki.de/fileadmin/user_upload/import/15938_inFL-UX.pdf}
}
M. Barz, O. S. Bhatti, H. M. T. Alam, H. M. D. Nguyen, K. Altmeyer, S. Malone, und D. Sonntag, "EyeNotate: Interactive Annotation of Mobile Eye Tracking Data Based on Few-Shot Image Classification" Journal of Eye Movement Research (JEMR), vol. 18, iss. 4.
2025.
doi: 10.3390/jemr18040027
@article{pub15948, abstract = {Mobile eye tracking is an important tool in psychology and human-centered interaction design for understanding how people process visual scenes and user interfaces. However, analyzing recordings from head-mounted eye trackers, which typically include an egocentric video of the scene and a gaze signal, is a time-consuming and largely manual process. To address this challenge, we develop eyeNotate, a web-based annotation tool that enables semi-automatic data annotation and learns to improve from corrective user feedback. Users can manually map fixation events to areas of interest (AOIs) in a video-editing-style interface (baseline version). Further, our tool can generate fixation-to-AOI mapping suggestions based on a few-shot image classification model (IML-support version). We conduct an expert study with trained annotators (n = 3) to compare the baseline and IML-support versions. We measure the perceived usability, annotations' validity and reliability, and efficiency during a data annotation task. We asked our participants to re-annotate data from a single individual using an existing dataset (n = 48). Further, we conducted a semi-structured interview to understand how participants used the provided IML features and assessed our design decisions. In a post hoc experiment, we investigate the performance of three image classification models in annotating data of the remaining 47 individuals.},
number = {4},
month = {7},
year = {2025},
title = {eyeNotate: Interactive Annotation of Mobile Eye Tracking Data Based on Few-Shot Image Classification},
journal = {Journal of Eye Movement Research (JEMR)},
volume = {18},
pages = {1-35},
publisher = {MDPI},
doi = {https://doi.org/10.3390/jemr18040027},
author = {Michael Barz and Omair Shahzad Bhatti and Hasan Md Tusfiqur Alam and Ho Minh Duy Nguyen and Kristin Altmeyer and Sarah Malone and Daniel Sonntag},
keywords = {Eye Tracking; Interactive Machine Learning; Few-Shot Learning; Human-Computer Interaction},
url = {https://www.mdpi.com/1995-8692/18/4/27 https://www.dfki.de/fileadmin/user_upload/import/15948_jemr-18-00027.pdf}
}
H. H. Le, H. M. D. Nguyen, O. S. Bhatti, L. Kopácsi, T. P. Ngo, B. T. Nguyen, M. Barz, und D. Sonntag, "I-MPN: inductive message passing network for efficient human-in-the-loop annotation of mobile eye tracking data" Scientific Reports (Sci Rep), vol. 15, iss. 1.
2025.
doi: 10.1038/s41598-025-94593-y
@article{pub15949, abstract = {Comprehending how humans process visual information in dynamic settings is crucial for psychology and designing user-centered interactions. While mobile eye-tracking systems combining egocentric video and gaze signals can offer valuable insights, manual analysis of these recordings is time-intensive. In this work, we present a novel human-centered learning algorithm designed for automated object recognition within mobile eye-tracking settings. Our approach seamlessly integrates an object detector with a spatial relation-aware inductive message-passing network (I-MPN), harnessing node profile information and capturing object correlations. Such mechanisms enable us to learn embedding functions capable of generalizing to new object angle views, facilitating rapid adaptation and efficient reasoning in dynamic contexts as users navigate their environment. Through experiments conducted on three distinct video sequences, our interactive-based method showcases significant performance improvements over fixed training/testing algorithms, even when trained on considerably smaller annotated samples collected through user feedback. Furthermore, we demonstrate exceptional efficiency in data annotation processes and surpass prior interactive methods that use complete object detectors, combine detectors with convolutional networks, or employ interactive video segmentation.},
number = {1},
month = {4},
year = {2025},
title = {I-MPN: inductive message passing network for efficient human-in-the-loop annotation of mobile eye tracking data},
journal = {Scientific Reports (Sci Rep)},
volume = {15},
pages = {1-17},
publisher = {Springer Nature},
doi = {https://doi.org/10.1038/s41598-025-94593-y},
author = {Hoang H. Le and Ho Minh Duy Nguyen and Omair Shahzad Bhatti and László Kopácsi and Thinh P. Ngo and Binh T. Nguyen and Michael Barz and Daniel Sonntag},
url = {https://doi.org/10.1038/s41598-025-94593-y https://www.dfki.de/fileadmin/user_upload/import/15949_s41598-025-94593-y.pdf}
}
S. Liang und D. Sonntag, "Advancing Biomedical Claim Verification by Using Large Language Models with Better Structured Prompting Strategies" in Proc. Proceedings of the 23rd Workshop on Biomedical Natural Language Processing. Workshop on Biomedical Natural Language Processing (BioNLP-2025), located at ACL 2025, August 1, Vienna, Austria, 2025.
@inproceedings{pub15963, abstract = {Biomedical claim verification involves determining the entailment relationship between a claim and evidence derived from medical studies or clinical trial reports (CTRs). In this work, we propose a structured four-step prompting strategy that explicitly guides large language models (LLMs) through (1) claim comprehension, (2) evidence analysis, (3) intermediate conclusion, and (4) entailment decision-making to improve the accuracy of biomedical claim verification. This strategy leverages compositional and human-like reasoning to enhance logical consistency and factual grounding to reduce reliance on memorizing few-shot exemplars and help LLMs generalize reasoning patterns across different biomedical claim verification tasks. Through extensive evaluation on biomedical NLI benchmarks, we analyze the individual contributions of each reasoning step. Our findings demonstrate that comprehension, evidence analysis, and intermediate conclusion each play distinct yet complementary roles. Systematic prompting and carefully designed step-wise instructions not only unlock the latent cognitive abilities of LLMs but also enhance interpretability by making it easier to trace errors and understand the model’s reasoning process. This research aims to improve the reliability of AI-driven biomedical claim verification.},
year = {2025},
title = {Advancing Biomedical Claim Verification by Using Large Language Models with Better Structured Prompting Strategies},
booktitle = {Proceedings of the 23rd Workshop on Biomedical Natural Language Processing. Workshop on Biomedical Natural Language Processing (BioNLP-2025), located at ACL 2025, August 1, Vienna, Austria},
publisher = {ACL Anthology},
author = {Siting Liang and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15963_Advancing_Biomedical_Claim_Verification_by_Using_Large_Language_Models_with_Better_Structured_Prompting_Strategies_camera_ready.pdf}
}
M. Barz, P. Karagiannis, J. Kildal, A. R. Pinto, J. R. de Munain, J. Rosel, M. Madarieta, K. Salagianni, P. Aivaliotis, S. Makris, und D. Sonntag, "MASTER-XR: Mixed Reality Ecosystem for Teaching Robotics in Manufacturing" , Alam, M. und Fathi, M. Eds., Springer.
doi: 10.1007/978-3-031-53652-6_10
@inbook{pub14702, abstract = {Many industries are transitioning to Industry 4.0 production models by adopting robots in their manufacturing processes. In parallel, Extended Reality (XR) technologies have reached sufficient maturity to enter the industrial applications domain, with early success cases often related to training workers, remote assistance, access to contextual information, and interaction with digital twins. In the future, robots will be increasingly enhanced with XR applications, which requires that industrial workers understand both technologies and use and control hybrid solutions confidently. Specific education and training programs will be essential to this transition, especially for vocational school students and professionals in upskilling. They must learn how to program robots and establish a safe and productive human-robot collaboration. The new EU-funded project MASTER will improve the XR ecosystem for teaching and training robotics in manufacturing by providing an open XR platform that integrates key functionalities like creating safe robotic environments, programming flexible robotic applications, and integrating advanced interaction mechanisms based on eye tracking. It will also provide high-quality training materials for robotics. We report on the project plan, our objectives, and milestones.},
year = {2024},
title = {MASTER-XR: Mixed Reality Ecosystem for Teaching Robotics in Manufacturing},
booktitle = {Integrated Systems: Data Driven Engineering},
editor = {Mohammad-Reza Alam and Madjid Fathi},
pages = {167-182},
isbn = {978-3-031-53652-6},
publisher = {Springer},
doi = {https://doi.org/10.1007/978-3-031-53652-6_10},
author = {Michael Barz and Panagiotis Karagiannis and Johan Kildal and Andoni Rivera Pinto and Judit Ruiz de Munain and Jesús Rosel and Maria Madarieta and Konstantina Salagianni and Panagiotis Aivaliotis and Sotiris Makris and Daniel Sonntag},
keywords = {Industry 4.0, Extended Reality (XR), Robotics, Worker Training, Manufacturing, Human-Robot Collaboration, Eye Tracking},
status_notes = {to appear},
url = {https://www.dfki.de/fileadmin/user_upload/import/14702_MASTER_XR_-_ISDT_23_-_Preprint.pdf https://link.springer.com/chapter/10.1007/978-3-031-53652-6_10}
}
K. Bengler, W. Damm, A. Luedtke, J. Rieger, B. Austel, B. Biebl, M. Fränzle, W. Hagemann, M. Held, D. Hess, K. Ihme, S. Kacianka, A. J. Kerscher, F. Laine, S. Lehnhoff, A. Pretschner, A. Rakow, D. Sonntag, J. Sztipanovits, M. Schwammberger, M. Schweda, A. Unni, und E. Veith, "A References Architecture for Human Cyber Physical Systems, Part II: Fundamental Design Principles for Human-CPS Interaction" ACM Transactions on Cyber-Physical Systems (TCPS), vol. 8.
2024.
@article{pub14704, abstract = {As automation increases qualitatively and quantitatively in safety-critical human cyber-physical systems, it is becoming more and more challenging to increase the probability or ensure that human operators still per- ceive key artifacts and comprehend their roles in the system. In the companion paper, we proposed an abstract reference architecture capable of expressing all classes of system-level interactions in human cyber-physical systems. Here we demonstrate how this reference architecture supports the analysis of levels of communi- cation between agents and helps to identify the potential for misunderstandings and misconceptions. We then develop a metamodel for safe human machine interaction. Therefore, we ask what type of information exchange must be supported on what level so that humans and systems can cooperate as a team, what is the criticality of exchanged information, what are timing requirements for such interactions, and how can we communicate highly critical information in a limited time frame in spite of the many sources of a dis- torted perception. We highlight shared stumbling blocks and illustrate shared design principles, which rest on established ontologies specific to particular application classes. In order to overcome the partial opacity of internal states of agents, we anticipate a key role of virtual twins of both human and technical cooperation partners for designing a suitable communication.},
month = {1},
year = {2024},
title = {A References Architecture for Human Cyber Physical Systems, Part II: Fundamental Design Principles for Human-CPS Interaction},
journal = {ACM Transactions on Cyber-Physical Systems (TCPS)},
volume = {8},
pages = {1-27},
publisher = {ACM},
author = {Klaus Bengler and Werner Damm and Andreas Luedtke and Jochem Rieger and Benedikt Austel and Bianca Biebl and Martin Fränzle and Willem Hagemann and Moritz Held and David Hess and Klas Ihme and Severin Kacianka and Alyssa J Kerscher and Forrest Laine and Sebastian Lehnhoff and Alexander Pretschner and Astrid Rakow and Daniel Sonntag and Janos Sztipanovits and Maike Schwammberger and Mark Schweda and Anirudh Unni and Eric Veith},
url = {https://scholar.google.de/citations?view_op=view_citation&hl=en&user=v7i6Uz4AAAAJ&sortby=pubdate&citation_for_view=v7i6Uz4AAAAJ:IUKN3-7HHlwC https://www.dfki.de/fileadmin/user_upload/import/14704_A_references_Architecture_for_Human_Cyber_Physical_Systems_Part_II.pdf}
}
W. Damm, D. Hess, M. Schweda, J. Sztipanovits, K. Bengler, B. Biebl, M. Fränzle, W. Hagemann, M. Held, K. Ihme, S. Kacianka, A. J. Kerscher, S. Lehnhoff, A. Luedtke, A. Pretschner, A. Rakow, J. Rieger, D. Sonntag, M. Schwammberger, B. Austel, A. Unni, und E. Veith, "A Reference Architecture of Human Cyber-Physical Systems – Part I: Fundamental Concepts" ACM Transactions on Cyber-Physical Systems (TCPS), vol. 8.
2024.
@article{pub14705, abstract = {We propose a reference architecture of safety-critical or industry-critical human cyber-physical systems (CPSs) capable of expressing essential classes of system-level interactions between CPS and humans rele- vant for the societal acceptance of such systems. To reach this quality gate, the expressivity of the model must go beyond classical viewpoints such as operational, functional, and architectural views and views used for safety and security analysis. The model does so by incorporating elements of such systems for mutual introspections in situational awareness, capabilities, and intentions to enable a synergetic, trusted relation in the interaction of humans and CPSs, which we see as a prerequisite for their societal acceptance. The refer- ence architecture is represented as a metamodel incorporating conceptual and behavioral semantic aspects. We illustrate the key concepts of the metamodel with examples from cooperative autonomous driving, the operating room of the future, cockpit-tower interaction, and crisis management.},
month = {1},
year = {2024},
title = {A Reference Architecture of Human Cyber-Physical Systems – Part I: Fundamental Concepts},
journal = {ACM Transactions on Cyber-Physical Systems (TCPS)},
volume = {8},
pages = {1-32},
publisher = {ACM},
author = {Werner Damm and David Hess and Mark Schweda and Janos Sztipanovits and Klaus Bengler and Bianca Biebl and Martin Fränzle and Willem Hagemann and Moritz Held and Klas Ihme and Severin Kacianka and Alyssa J Kerscher and Sebastian Lehnhoff and Andreas Luedtke and Alexander Pretschner and Astrid Rakow and Jochem Rieger and Daniel Sonntag and Maike Schwammberger and Benedikt Austel and Anirudh Unni and Eric Veith},
url = {https://scholar.google.de/citations?view_op=view_citation&hl=en&user=v7i6Uz4AAAAJ&sortby=pubdate&citation_for_view=v7i6Uz4AAAAJ:mlAyqtXpCwEC https://www.dfki.de/fileadmin/user_upload/import/14705_A_Reference_Architecture_of_Human_Cyber-Physical_Systems_Part_I.pdf}
}
S. Liang, P. V. Sánchez, und D. Sonntag, "Optimizing Relation Extraction in Medical Texts through Active Learning: A Comparative Analysis of Trade-offs" in Proc. Association for Computational Linguistics. Conference of the European Chapter of the Association for Computational Linguistics (EACL-2024), March 17-22, St. Julians, Malta, 2024.
@inproceedings{pub14721, abstract = {Our work explores the effectiveness of employing Clinical BERT for Relation Extraction (RE) tasks in medical texts within an Active Learning (AL) framework. Our main objective is to optimize RE in medical texts through AL while examining the trade-offs between performance and computation time, comparing it with alternative methods like Random Forest and BiLSTM networks. Comparisons extend to feature engineering requirements, performance metrics, and considerations of annotation costs, including AL step times and annotation rates. The utilization of AL strategies aligns with our broader goal of enhancing the efficiency of relation classification models, particularly when dealing with the challenges of annotating complex medical texts in a Human-in-the-Loop (HITL) setting. The results indicate that uncertainty-based sampling achieves comparable performance with significantly fewer annotated samples across three categories of supervised learning methods, thereby reducing annotation costs for clinical and biomedical corpora. While Clinical BERT exhibits clear performance advantages across two different corpora, the trade-off involves longer computation times in interactive annotation processes. In real-world applications, where practical feasibility and timely results are crucial, optimizing this trade-off becomes imperative.},
year = {2024},
title = {Optimizing Relation Extraction in Medical Texts through Active Learning: A Comparative Analysis of Trade-offs},
booktitle = {Association for Computational Linguistics. Conference of the European Chapter of the Association for Computational Linguistics (EACL-2024), March 17-22, St. Julians, Malta},
publisher = {ACL Anthology},
author = {Siting Liang and Pablo Valdunciel Sánchez and Daniel Sonntag},
url = {https://aclanthology.org/2024.uncertainlp-1.3/ https://www.dfki.de/fileadmin/user_upload/import/14721_Optimizing_Relation_Extraction_in_Medical_Texts_through_Active_Learning.pdf}
}
W. Damm, M. Fränzle, A. J. Kerscher, F. Laine, K. Bengler, B. Biebl, W. Hagemann, M. Held, D. Hess, K. Ihme, S. Kacianka, S. Lehnhoff, A. Lüdtke, A. Pretschner, A. Rakow, J. W. Rieger, D. Sonntag, J. Sztipanovits, M. Schwammberger, M. Schweda, A. Trende, A. Unni, und E. M. S. P. Veith, "A Reference Architecture of Human Cyber-Physical Systems - Part III: Semantic Foundations" ACM Transactions on Cyber-Physical Systems (TCPS), vol. 8, iss. 1.
2024.
doi: 10.1145/3622881
@article{pub14732, abstract = {The design and analysis of multi-agent human cyber-physical systems in safety-critical or industry-critical domains calls for an adequate semantic foundation capable of exhaustively and rigorously describing all emergent effects in the joint dynamic behavior of the agents that are relevant to their safety and well-behavior. We present such a semantic foundation. This framework extends beyond previous approaches by extending the agent-local dynamic state beyond state components under direct control of the agent and belief about other agents (as previously suggested for understanding cooperative as well as rational behavior) to agent-local evidence and belief about the overall cooperative, competitive, or coopetitive game structure. We argue that this extension is necessary for rigorously analyzing systems of human cyber-physical systems because humans are known to employ cognitive replacement models of system dynamics that are both non-stationary and potentially incongruent. These replacement models induce visible and potentially harmful effects on their joint emergent behavior and the interaction with cyber-physical system components.},
number = {1},
year = {2024},
title = {A Reference Architecture of Human Cyber-Physical Systems - Part III: Semantic Foundations},
journal = {ACM Transactions on Cyber-Physical Systems (TCPS)},
volume = {8},
pages = {1-23},
publisher = {ACM},
doi = {https://doi.org/10.1145/3622881},
author = {Werner Damm and Martin Fränzle and Alyssa J. Kerscher and Forrest Laine and Klaus Bengler and Bianca Biebl and Willem Hagemann and Moritz Held and David Hess and Klas Ihme and Severin Kacianka and Sebastian Lehnhoff and Andreas Lüdtke and Alexander Pretschner and Astrid Rakow and Jochem W. Rieger and Daniel Sonntag and Janos Sztipanovits and Maike Schwammberger and Mark Schweda and Alexander Trende and Anirudh Unni and Eric M. S. P. Veith},
url = {https://dl.acm.org/doi/10.1145/3622881 https://www.dfki.de/fileadmin/user_upload/import/14732_A_Reference_Architecture_of_Human_Cyber-Physical_Part_III.pdf}
}
H. Kath, P. P. Serafini, I. B. Campos, T. Gouvea, und D. Sonntag, "Leveraging Transfer Learning and Active Learning for Sound Event Detection in Passive Acoustic Monitoring of Wildlife" in Proc. 3rd Annual AAAI Workshop on AI to Accelerate Science and Engineering. AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE-2024), located at AAAI, February 26, Vancouver,, BC, Canada, 2024.
@inproceedings{pub14737, abstract = {Passive Acoustic Monitoring (PAM) has emerged as a pivotal technology for wildlife monitoring, generating vast amounts of acoustic data. However, the successful application of machine learning methods for sound event detection in PAM datasets heavily relies on the availability of annotated data, which can be laborious to acquire. In this study, we investigate the effectiveness of transfer learning and active learning techniques to address the data annotation challenge in PAM. Transfer learning allows us to use pre-trained models from related tasks or datasets to bootstrap the learning process for sound event detection. Furthermore, active learning promises strategic selection of the most informative samples for annotation, effectively reducing the annotation cost and improving model performance. We evaluate an approach that combines transfer learning and active learning to efficiently exploit existing annotated data and optimize the annotation process for PAM datasets. Our transfer learning observations show that embeddings produced by BirdNet, a model trained on high signal-to-noise recordings of bird vocalisations, can be effectively used for predicting anurans in PAM data: a linear classifier constructed using these embeddings outperforms the benchmark by 21.7%. Our results indicate that active learning is superior to random sampling, although no clear winner emerges among the strategies employed. The proposed method holds promise for facilitating broader adoption of machine learning techniques in PAM and advancing our understanding of biodiversity dynamics through acoustic data analysis.},
year = {2024},
title = {Leveraging Transfer Learning and Active Learning for Sound Event Detection in Passive Acoustic Monitoring of Wildlife},
booktitle = {3rd Annual AAAI Workshop on AI to Accelerate Science and Engineering. AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE-2024), located at AAAI, February 26, Vancouver,, BC, Canada},
publisher = {o.A.},
author = {Hannes Kath and Patricia P. Serafini and Ivan Braga Campos and Thiago Gouvea and Daniel Sonntag},
url = {https://ai-2-ase.github.io/papers/35%5cCameraReady.pdf https://www.dfki.de/fileadmin/user_upload/import/14737_Kath_et_al_2024_Leveraging_Transfer_Learning_and_Active_Learning_for_Sound_Event_Detection_in.pdf}
}
B. Lüers, P. P. Serafini, I. B. Campos, T. Gouvea, und D. Sonntag, "BirdNET-Annotator: AI-Assisted Strong Labelling of Bird Sound Datasets" in Proc. 3rd Annual AAAI Workshop on AI to Accelerate Science and Engineering. AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE), located at AAAI, February 26, Vancouver,, BC, Canada, 2024.
@inproceedings{pub14738, abstract = {Monitoring biodiversity in biosphere reserves is challenging due to the vast regions to be monitored. Thus, conservation- ists have resorted to employing passive acoustic monitoring (PAM), which automates the audio recording process. PAM can create large, unlabeled datasets, but deriving knowledge from such recordings is usually still done manually. Machine learning enables the detection of vocalizations of species automatically, allowing summarizing the biodiversity in an area in terms of species richness. While pre-trained neu- ral network models for bird vocalization detection exist, they are often not-reliable enough to do way with the need for manual labeling of audio files. In this paper, we present BirdNET-Annotator, a tool for AI- assisted labeling of audio datasets co-developed by ecoacous- tics and ML experts. BirdNET-Annotator runs in the cloud free of charge, enabling end users to scale beyond the limita- tions of their local hardware. We evaluated the performance of our solution in the context of its intended workflow and found a reduction in annotation times. While our results show that our application now meets the user requirements, there are still opportunities to seize for additional performance and usability improvement. Our application illustrates how large, pre-trained neural mod- els can be integrated into the workflow of domain experts when packaged in a user-friendly manner. We observe that although our solution adds a step to the preexisting workflow, the overall annotation speed is significantly improved. This hints at further improvement to be realized in the future by consolidating more steps of the workflow into fewer tools.},
year = {2024},
title = {BirdNET-Annotator: AI-Assisted Strong Labelling of Bird Sound Datasets},
booktitle = {3rd Annual AAAI Workshop on AI to Accelerate Science and Engineering. AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE), located at AAAI, February 26, Vancouver,, BC, Canada},
publisher = {o.A.},
author = {Bengt Lüers and Patricia P. Serafini and Ivan Braga Campos and Thiago Gouvea and Daniel Sonntag},
url = {https://ai-2-ase.github.io/papers/15%5cCameraReady%5c20231211_AI2ASE_AAAI_BirdNET_Annotator_CameraReady.pdf https://www.dfki.de/fileadmin/user_upload/import/14738_Lueers_et_al_2024_BirdNET-Annotator.pdf}
}
I. Troshani, T. Gouvea, und D. Sonntag, "Leveraging Sound Collections for Animal Species Classification with Weakly Supervised Learning" in Proc. 3rd Annual AAAI Workshop on AI to Accelerate Science and Engineering. AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE-2024), located at AAAI, February 26, Vancouver,, BC, Canada, 2024.
@inproceedings{pub14739, abstract = {The utilization of Passive Acoustic Monitoring (PAM) for wildlife monitoring remains hindered by the challenge of data analysis. While numerous supervised ML algorithms exist, their application is constrained by the scarcity of annotated data. Expert-curated sound collections are valuable knowl- edge sources that could bridge this gap. However, their uti- lization is hindered by the sporadic sounds to be identified in these recordings. In this study, we propose a weakly su- pervised approach to tackle this challenge and assess its per- formance using the AnuraSet dataset. We employ TALNet, a Convolutional Recurrent Neural Network (CRNN) model and train it on 60-second sound recordings labeled for the presence of 42 different anuran species. We conduct the eval- uation on 1-second segments, enabling precise sound event localization. Furthermore, we investigate the impact of vary- ing the length of the training input and explore different pool- ing functions’ effects on TALNet’s performance on AnuraSet. Our findings demonstrate the effectiveness of TALNet in har- nessing weakly annotated sound collections for wildlife mon- itoring.},
year = {2024},
title = {Leveraging Sound Collections for Animal Species Classification with Weakly Supervised Learning},
booktitle = {3rd Annual AAAI Workshop on AI to Accelerate Science and Engineering. AAAI Workshop on AI to Accelerate Science and Engineering (AI2ASE-2024), located at AAAI, February 26, Vancouver,, BC, Canada},
publisher = {o.A.},
author = {Ilira Troshani and Thiago Gouvea and Daniel Sonntag},
url = {https://ai-2-ase.github.io/papers/19%5cCameraReady.pdf https://www.dfki.de/fileadmin/user_upload/import/14739_Troshani_et_al_2024_Leveraging_Sound_Collections_for_Animal_Species_Classification_with_Weakly.pdf}
}
M. A. Kadir, H. M. T. Alam, P. Maul, H. Profitlich, M. Wolf, und D. Sonntag, Modular Deep Active Learning Framework for Image Annotation: A Technical Report for the Ophthalmo-AI Project.
@misc{pub14772, abstract = {Image annotation is one of the most essential tasks for guaranteeing proper treatment for patients and tracking progress over the course of therapy in the field of medical imaging and disease diagnosis. However, manually annotating a lot of 2D and 3D imaging data can be extremely tedious. Deep Learning (DL) based segmentation algorithms have completely transformed this process and made it possible to automate image segmentation. By accurately segmenting medical images, these algorithms can greatly minimize the time and effort necessary for manual annotation. Additionally, by incorporating Active Learning (AL) methods, these segmentation algorithms can perform far more effectively with a smaller amount of ground truth data. We introduce MedDeepCyleAL, an end-to-end framework implementing the complete AL cycle. It provides researchers with the flexibility to choose the type of deep learning model they wish to employ and includes an annotation tool that supports the classification and segmentation of medical images. The user-friendly interface allows for easy alteration of the AL and DL model settings through a configuration file, requiring no prior programming experience. While MedDeepCyleAL can be applied to any kind of image data, we have specifically applied it to ophthalmology data in this project.},
month = {3},
year = {2024},
title = {Modular Deep Active Learning Framework for Image Annotation: A Technical Report for the Ophthalmo-AI Project},
author = {Md Abdul Kadir and Hasan Md Tusfiqur Alam and Pascale Maul and Hans-Jürgen Profitlich and Moritz Wolf and Daniel Sonntag},
url = {https://arxiv.org/abs/2403.15143 https://www.dfki.de/fileadmin/user_upload/import/14772_2403.15143.pdf}
}
S. Liang, H. Profitlich, M. Klass, N. Möller-Grell, C. Bergmann, S. Heim, C. Niklas, und D. Sonntag, "Building A German Clinical Named Entity Recognition System without In-domain Training Data" in Proc. Association for Computational Linguistics. Meeting of the North American Chapter of the Association for Computational Linguistics (NAACL-2024), June 17-21, Mexico City, Mexico, 2024.
@inproceedings{pub14838, abstract = {Clinical Named Entity Recognition (NER) is essential for extracting important medical insights from clinical narratives. Given the challenges in obtaining expert training datasets for real-world clinical applications related to data protection regulations and the lack of standardised entity types, this work represents a collaborative initiative aimed at building a German clinical NER system with a focus on addressing these obstacles effectively. In response to the challenge of training data scarcity, we propose a \textbf{Conditional Relevance Learning (CRL)} approach in low-resource transfer learning scenarios. \textbf{CRL} effectively leverages a pre-trained language model and domain-specific open resources, enabling the acquisition of a robust base model tailored for clinical NER tasks, particularly in the face of changing label sets. This flexibility empowers the implementation of a \textbf{Multilayered Semantic Annotation (MSA)} schema in our NER system, capable of organizing a diverse array of entity types, thus significantly boosting the NER system's adaptability and utility across various clinical domains. In the case study, we demonstrate how our NER system can be applied to overcome resource constraints and comply with data privacy regulations. Lacking prior training on in-domain data, feedback from expert users in respective domains is essential in identifying areas for system refinement. Future work will focus on the integration of expert feedback to improve system performance in specific clinical contexts.},
year = {2024},
title = {Building A German Clinical Named Entity Recognition System without In-domain Training Data},
booktitle = {Association for Computational Linguistics. Meeting of the North American Chapter of the Association for Computational Linguistics (NAACL-2024), June 17-21, Mexico City, Mexico},
publisher = {ACL Anthology},
author = {Siting Liang and Hans-Jürgen Profitlich and Maximilian Klass and Niko Möller-Grell and Celine-Fabienne Bergmann and Simon Heim and Christian Niklas and Daniel Sonntag},
keywords = {Clinical NLP, Named Entity Recognition, Low-resource Language, Transfer Learning},
url = {https://aclanthology.org/2024.clinicalnlp-1.7/ https://www.dfki.de/fileadmin/user_upload/import/14838_Building_A_German_NER_without_In_domain_Training_Data__clinical_NLP__(1).pdf}
}
A. M. Selim, M. Rekrut, M. Barz, und D. Sonntag, "Speech Imagery BCI Training Using Game with a Purpose" in Proc. Proceedings of the 2024 International Conference on Advanced Visual Interfaces. International Working Conference on Advanced Visual Interfaces (AVI-2024), June 3-7, Arenzano, Genoa, Italy, 2024.
doi: 10.1145/3656650.3656654
@inproceedings{pub14962, abstract = {Games are used in multiple fields of brain-computer interface (BCI) research and applications to improve participants’ engagement and enjoyment during electroencephalogram (EEG) data collection. However, despite potential benefits, no current studies have reported on implemented games for Speech Imagery BCI. Imagined speech is speech produced without audible sounds or active movement of the articulatory muscles. Collecting imagined speech EEG data is a time-consuming, mentally exhausting, and cumbersome process, which requires participants to read words off a computer screen and produce them as imagined speech. To improve this process for study participants, we implemented a maze-like game where a participant navigated a virtual robot capable of performing five actions that represented our words of interest while we recorded their EEG data. The study setup was evaluated with 15 participants. Based on their feedback, the game improved their engagement and enjoyment while resulting in a 69.10% average classification accuracy using a random forest classifier.},
month = {6},
year = {2024},
title = {Speech Imagery BCI Training Using Game with a Purpose},
booktitle = {Proceedings of the 2024 International Conference on Advanced Visual Interfaces. International Working Conference on Advanced Visual Interfaces (AVI-2024), June 3-7, Arenzano, Genoa, Italy},
pages = {1-5},
isbn = {9798400717642},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3656650.3656654},
author = {Abdulrahman Mohamed Selim and Maurice Rekrut and Michael Barz and Daniel Sonntag},
keywords = {BCI, EEG, Game with a purpose (GWAP), Imagined speech, User study},
url = {https://doi.org/10.1145/3656650.3656654 https://www.dfki.de/fileadmin/user_upload/import/14962_avi2024-4.pdf}
}
A. M. Selim, M. Barz, O. S. Bhatti, H. M. T. Alam, und D. Sonntag, "A review of machine learning in scanpath analysis for passive gaze-based interaction" Frontiers in Artificial Intelligence (Front. Artif. Intell.), vol. 7.
2024.
doi: 10.3389/frai.2024.1391745
@article{pub14976, abstract = {The scanpath is an important concept in eye tracking. It refers to a person's eye movements over a period of time, commonly represented as a series of alternating fixations and saccades. Machine learning has been increasingly used for the automatic interpretation of scanpaths over the past few years, particularly in research on passive gaze-based interaction, i.e., interfaces that implicitly observe and interpret human eye movements, with the goal of improving the interaction. This literature review investigates research on machine learning applications in scanpath analysis for passive gaze-based interaction between 2012 and 2022, starting from 2,425 publications and focussing on 77 publications. We provide insights on research domains and common learning tasks in passive gaze-based interaction and present common machine learning practices from data collection and preparation to model selection and evaluation. We discuss commonly followed practices and identify gaps and challenges, especially concerning emerging machine learning topics, to guide future research in the field.},
month = {6},
year = {2024},
title = {A review of machine learning in scanpath analysis for passive gaze-based interaction},
editor = {Maria Chiara Caschera},
journal = {Frontiers in Artificial Intelligence (Front. Artif. Intell.)},
volume = {7},
pages = {1-28},
publisher = {Frontiers Media SA, Avenue du Tribunal-Fédéral 34 1005 Lausanne Switze},
doi = {https://doi.org/10.3389/frai.2024.1391745},
author = {Abdulrahman Mohamed Selim and Michael Barz and Omair Shahzad Bhatti and Hasan Md Tusfiqur Alam and Daniel Sonntag},
keywords = {machine learning, eye tracking, scanpath, passive gaze-based interaction, literature review},
url = {https://www.dfki.de/fileadmin/user_upload/import/14976_frai-07-1391745.pdf https://www.frontiersin.org/journals/artificial-intelligence/articles/10.3389/frai.2024.1391745/full}
}
D. Sonntag, M. Barz, und T. Gouvea, "A look under the hood of the Interactive Deep Learning Enterprise (No-IDLE)" German Research Center for AI2024.
@techreport{pub15026, series = {DFKI Technical Report},
abstract = {This DFKI technical report presents the anatomy of the No-IDLE prototype system (funded by the German Federal Ministry of Education and Research) that provides not only basic and fundamental research in interactive machine learning, but also reveals deeper insights into users' behaviours, needs, and goals. Machine learning and deep learning should become accessible to millions of end users. No-IDLE's goals and scienfific challenges centre around the desire to increase the reach of interactive deep learning solutions for non-experts in machine learning. One of the key innovations described in this technical report is a methodology for interactive machine learning combined with multimodal interaction which will become central when we start interacting with semi-intelligent machines in the upcoming area of neural networks and large language models.},
month = {6},
year = {2024},
title = {A look under the hood of the Interactive Deep Learning Enterprise (No-IDLE)},
volume = {-},
institution = {German Research Center for AI},
author = {Daniel Sonntag and Michael Barz and Thiago Gouvea},
url = {https://arxiv.org/abs/2406.19054 https://www.dfki.de/fileadmin/user_upload/import/15026_2406.19054v1.pdf}
}
H. Kath, P. P. Serafin, I. B. Campos, T. Gouvea, und D. Sonntag, "Demo: Enhancing Wildlife Acoustic Data Annotation Efficiency through Transfer and Active Learning" in Proc. Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. International Joint Conference on Artificial Intelligence (IJCAI-2024), located at IJCAI, August 03-09, Jeju. International Joint Conference on Artificial Intelligence (IJCAI-2024), August 3-9, Jeju, Korea, Republic of, 2024.
@inproceedings{pub15055, abstract = {Passive Acoustic Monitoring (PAM) has become a key technology in wildlife monitoring, generating large amounts of acoustic data. However, the effective application of machine learning methods for sound event detection in PAM datasets is highly dependent on the accessibility of annotated data, a process that can be labour intensive. As a team of domain experts and machine learning researchers, in this paper we present a no-code annotation tool designed for PAM datasets that incorporates transfer learning and active learning strategies to address the data annotation challenge inherent in PAM. Transfer learning is applied to use pre-trained models to compute meaningful embeddings from the PAM audio files. Active learning iteratively identifies the most informative samples and then presents them to the user for annotation. This iterative approach improves the performance of the model compared to random sample selection. In a preliminary evaluation of the tool, a domain expert annotated part of a real PAM data set. Compared to conventional tools, the workflow of the proposed tool showed a speed improvement of 2-4 times. Further enhancements, such as the incorporation of sound examples, have the potential to further improve efficiency.},
month = {8},
year = {2024},
title = {Demo: Enhancing Wildlife Acoustic Data Annotation Efficiency through Transfer and Active Learning},
booktitle = {Proceedings of the Thirty-Third International Joint Conference on Artificial Intelligence. International Joint Conference on Artificial Intelligence (IJCAI-2024), located at IJCAI, August 03-09, Jeju. International Joint Conference on Artificial Intelligence (IJCAI-2024), August 3-9, Jeju, Korea, Republic of},
publisher = {International Joint Conferences on Artificial Intelligence},
author = {Hannes Kath and Patricia P. Serafin and Ivan B. Campos and Thiago Gouvea and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15055_Demo_TransferLearning_ActiveLearning_Bioacoustic.pdf https://www.ijcai.org/proceedings/2024/1010}
}
H. Kath, T. Gouvea, und D. Sonntag, "Active Learning in Multi-label Classification of Bioacoustic Data" in Proc. KI 2024: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2024), 47th German Conference on AI, Würzburg, Germany, September 25–27, 2023, Proceedings, located at 47th German Conference on AI, September 25-27, Würzburg, Germany, Germany, 2024.
@inproceedings{pub15056, abstract = {Passive Acoustic Monitoring (PAM) has become a key technology in wildlife monitoring, providing vast amounts of acoustic data. The recording process naturally generates multi-label datasets; however, due to the significant annotation time required, most available datasets use exclusive labels. While active learning (AL) has shown the potential to speed up the annotation process of multi-label PAM data, it lacks standardized performance metrics across experimental setups. We present a novel performance metric for AL, the `speedup factor', which remains constant across experimental setups. It quantifies the fraction of samples required by an AL strategy compared to random sampling to achieve equivalent model performance. Using two multi-label PAM datasets, we investigate the effects of class sparsity, ceiling performance, number of classes, and different AL strategies on AL performance. Our results show that AL performance is superior on datasets with sparser classes, lower ceiling performance, fewer classes, and when using uncertainty sampling strategies.},
month = {9},
year = {2024},
title = {Active Learning in Multi-label Classification of Bioacoustic Data},
booktitle = {KI 2024: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2024), 47th German Conference on AI, Würzburg, Germany, September 25–27, 2023, Proceedings, located at 47th German Conference on AI, September 25-27, Würzburg, Germany, Germany},
editor = {Dietmar Seipel and Alexander Steen},
publisher = {Springer, Heidelberg},
author = {Hannes Kath and Thiago Gouvea and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15056_Active_Learning_in_Multi-label_Classification_of_Bioacoustic_Data.pdf}
}
H. Kath, T. Gouvea, und D. Sonntag, "A Human-in-the-Loop Tool for Annotating Passive Acoustic Monitoring Datasets\\(Extended Abstract)" in Proc. KI 2024: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2024), 47th German Conference on AI, Würzburg, Germany, September 25–27, 2023, Proceedings, located at 47th German Conference on AI, September 25-27, Würzburg, Germany, Germany, 2024.
@inproceedings{pub15057, abstract = {Passive Acoustic Monitoring (PAM) has become a key technology in wildlife monitoring, generating large amounts of acoustic data. However, the effective application of machine learning methods for sound event detection in PAM datasets is highly dependent on the availability of annotated data, which requires a labour-intensive effort to generate. This paper summarises two iterative, human-centred approaches that make efficient use of expert annotation time to accelerate understanding of the data: Combining transfer learning and active learning, we present an annotation tool that selects and annotates the most informative samples one at a time. To annotate multiple samples simultaneously, we present a tool that allows annotation in the embedding space of a variational autoencoder manipulated by a classification head. For both approaches, we provide no-code web applications for intuitive use by domain experts.},
month = {9},
year = {2024},
title = {A Human-in-the-Loop Tool for Annotating Passive Acoustic Monitoring Datasets\\(Extended Abstract)},
booktitle = {KI 2024: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2024), 47th German Conference on AI, Würzburg, Germany, September 25–27, 2023, Proceedings, located at 47th German Conference on AI, September 25-27, Würzburg, Germany, Germany},
editor = {Dietmar Seipel and Alexander Steen},
publisher = {Springer, Heidelberg},
author = {Hannes Kath and Thiago Gouvea and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15057_Bioacoustic_Annotation_Extended_Abstract.pdf}
}
I. Troshani, T. Gouvea, und D. Sonntag, "Wild Data Treasures: Towards Sustainable Practices in Deep Learning for Wildlife Monitoring" in Proc. CHI EA '24: Extended Abstracts of the 2024 CHI Conference on Human Factors in Computing Systems. CHI Workshop on Sustaining Scalable Sustainability, Human-Centered Green Technology for Community-wide Carbon Reduction, located at ACM CHI 2024, May 11, Honolulu,, HI, USA, 2024.
@inproceedings{pub15069, abstract = {While data collection and annotation is crucial for training super- vised machine learning models and improving their accuracy, it can be resource-intensive. In this paper, we propose a weakly su- pervised method to extract fine-grained information from existing weakly-annotated data accumulated over time and alleviate the need for collection and annotation of fresh data. We also integrate it in an interactive tool that facilitates training and annotation. Communities comprising ecologists and other domain experts can use it to train machine learning models to detect animal species and monitor wildlife in protected areas. Our method not only improves the extraction of information from coarse labels but also simplifies the process of annotating new data for experts.. By lowering the time and expertise barrier to data annotation, we also aim to en- courage individuals with varying levels of expertise to participate more in citizen science and contribute to preserving ecosystems.},
month = {5},
year = {2024},
title = {Wild Data Treasures: Towards Sustainable Practices in Deep Learning for Wildlife Monitoring},
booktitle = {CHI EA '24: Extended Abstracts of the 2024 CHI Conference on Human Factors in Computing Systems. CHI Workshop on Sustaining Scalable Sustainability, Human-Centered Green Technology for Community-wide Carbon Reduction, located at ACM CHI 2024, May 11, Honolulu,, HI, USA},
isbn = {979-8-4007-0331-7},
publisher = {ACM, New York, USA},
author = {Ilira Troshani and Thiago Gouvea and Daniel Sonntag},
url = {https://sustainingscalablesustainability.wordpress.com/wp-content/uploads/2024/04/wild-data-treasures.pdf https://www.dfki.de/fileadmin/user_upload/import/15069_wild-data-treasures.pdf}
}
I. Troshani, T. Gouvea, und D. Sonntag, "Leveraging Weakly Supervised and Multiple Instance Learning for Multi-label Classification of Passive Acoustic Monitoring Data" in Proc. KI 2024: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2024), 47th German Conference on AI, Würzburg, Germany, September 25–27, 2023, located at 47th German Conference on AI, September 25-27, Würzburg, Germany, 2024.
@inproceedings{pub15118, abstract = {Data collection and annotation are time-consuming, resource-intensive processes that often require domain expertise. Existing data collections such as animal sound collections provide valuable data sources, but their utilization is often hindered by the lack of fine-grained labels. In this study, we examine the use of existing weakly supervised methods to extract fine-grained information from existing weakly-annotated data accumulated over time and alleviate the need for collection and annotation of fresh data. We employ TALNet, a Convolutional Recurrent Neural Network (CRNN) model and train it on 60-second sound recordings labeled for the presence of 42 different anuran species and compare it to other models such as BirdNet, a model for detection of bird vocalisation. We conduct the evaluation on 1-second segments, enabling precise sound event localization. Furthermore, we investigate the impact of varying the length of the training input and explore different pooling functions' effects on the model's performance on AnuraSet. Finally, we integrate it in an interactive user interface that facilitates training and annotation. Our findings demonstrate the effectiveness of TALNet and BirdNet in harnessing weakly annotated sound collections for wildlife monitoring. Our method not only improves the extraction of information from coarse labels but also simplifies the process of annotating new data for experts.},
year = {2024},
title = {Leveraging Weakly Supervised and Multiple Instance Learning for Multi-label Classification of Passive Acoustic Monitoring Data},
booktitle = {KI 2024: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2024), 47th German Conference on AI, Würzburg, Germany, September 25–27, 2023, located at 47th German Conference on AI, September 25-27, Würzburg, Germany},
publisher = {Springer},
author = {Ilira Troshani and Thiago Gouvea and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15118__KI24__WSL4bioacoustics-3.pdf}
}
A. Anagnostopoulou, T. Gouvea, und D. Sonntag, Enhancing Journalism with AI: A Study of Contextualized Image Captioning for News Articles using LLMs and LMMs.
@misc{pub15145, abstract = {Large language models (LLMs) and large multimodal models (LMMs) have significantly impacted the AI community, industry, and various economic sectors. In journalism, integrating AI poses unique challenges and opportunities, particularly in enhancing the quality and efficiency of news reporting. This study explores how LLMs and LMMs can assist journalistic practice by generating contextualised captions for images accompanying news articles. We conducted experiments using the GoodNews dataset to evaluate the ability of LMMs (BLIP-2, GPT-4v, or LLaVA) to incorporate one of two types of context: entire news articles, or extracted named entities. In addition, we compared their performance to a two-stage pipeline composed of a captioning model (BLIP-2, OFA, or ViT-GPT2) with post-hoc contextualisation with LLMs (GPT-4 or LLaMA). We assess a diversity of models, and we find that while the choice of contextualisation model is a significant factor for the two-stage pipelines, this is not the case in the LMMs, where smaller, open-source models perform well compared to proprietary, GPT-powered ones. Additionally, we found that controlling the amount of provided context enhances performance. These results highlight the limitations of a fully automated approach and underscore the necessity for an interactive, human-in-the-loop strategy.},
month = {8},
year = {2024},
title = {Enhancing Journalism with AI: A Study of Contextualized Image Captioning for News Articles using LLMs and LMMs},
howpublished = {Trustworthy Interactive Decision-Making with Foundation Models Workshop @ IJCAI 2024},
author = {Aliki Anagnostopoulou and Thiago Gouvea and Daniel Sonntag},
keywords = {contextualised image captioning, foundation models, large language models, large multimodal models, AI in journalism},
url = {https://openreview.net/forum?id=L6OosgRO0K https://www.dfki.de/fileadmin/user_upload/import/15145_Enhancing_Journalism_with_AI.pdf}
}
N. Feldhus, A. Anagnostopoulou, Q. Wang, M. Alshomary, H. Wachsmuth, D. Sonntag, und S. Möller, "Towards Modeling and Evaluating Instructional Explanations in Teacher-Student Dialogues" in Proc. Proceedings of the 2024 International Conference on Information Technology for Social Good. ACM International Conference on Information Technology for Social Good (GoodIT-2024), September 4-6, Bremen, Germany, 2024.
doi: 10.1145/3677525.3678665
@inproceedings{pub15147, series = {GoodIT '24},
abstract = {For dialogues in which teachers explain difficult concepts to students, didactics research often debates which teaching strategies lead to the best learning outcome. In this paper, we test if LLMs can reliably annotate such explanation dialogues, s.t. they could assist in lesson planning and tutoring systems. We first create a new annotation scheme of teaching acts aligned with contemporary teaching models and re-annotate a dataset of conversational explanations about communicating scientific understanding in teacher-student settings on five levels of the explainee’s expertise: ReWIRED contains three layers of acts (Teaching, Explanation, Dialogue) with increased granularity (span-level). We then evaluate language models on the labeling of such acts and find that the broad range and structure of the proposed labels is hard to model for LLMs such as GPT-3.5/-4 via prompting, but a fine-tuned BERT can perform both act classification and span labeling well. Finally, we operationalize a series of quality metrics for instructional explanations in the form of a test suite, finding that they match the five expertise levels well.},
month = {9},
year = {2024},
title = {Towards Modeling and Evaluating Instructional Explanations in Teacher-Student Dialogues},
booktitle = {Proceedings of the 2024 International Conference on Information Technology for Social Good. ACM International Conference on Information Technology for Social Good (GoodIT-2024), September 4-6, Bremen, Germany},
isbn = {9798400710940},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3677525.3678665},
author = {Nils Feldhus and Aliki Anagnostopoulou and Qianli Wang and Milad Alshomary and Henning Wachsmuth and Daniel Sonntag and Sebastian Möller},
keywords = {Dialogue, Discourse Analysis, Evaluation, Explanations},
url = {https://doi.org/10.1145/3677525.3678665}
}
H. Kath, P. P. Serafini, I. B. Campos, T. Gouvea, und D. Sonntag, "Leveraging transfer learning and active learning for data annotation in passive acoustic monitoring of wildlife" Ecological Informatics, vol. 82.
2024.
doi: 10.1016/j.ecoinf.2024.102710
@article{pub15156, abstract = {Passive Acoustic Monitoring (PAM) has emerged as a pivotal technology for wildlife monitoring, generating vast amounts of acoustic data. However, the successful application of machine learning methods for sound event detection in PAM datasets heavily relies on the availability of annotated data, which can be laborious to acquire. In this study, we investigate the effectiveness of transfer learning and active learning techniques to address the data annotation challenge in PAM. Transfer learning allows us to use pre-trained models from related tasks or datasets to bootstrap the learning process for sound event detection. Furthermore, active learning promises strategic selection of the most informative samples for annotation, effectively reducing the annotation cost and improving model performance. We evaluate an approach that combines transfer learning and active learning to efficiently exploit existing annotated data and optimize the annotation process for PAM datasets. Our transfer learning observations show that embeddings produced by BirdNet, a model trained on high signal-to-noise recordings of bird vocalisations, can be effectively used for predicting anurans in PAM data: a linear classifier constructed using these embeddings outperforms the benchmark by 21.7%. Our results indicate that active learning is superior to random sampling, although no clear winner emerges among the strategies employed. The proposed method holds promise for facilitating broader adoption of machine learning techniques in PAM and advancing our understanding of biodiversity dynamics through acoustic data analysis.},
year = {2024},
title = {Leveraging transfer learning and active learning for data annotation in passive acoustic monitoring of wildlife},
journal = {Ecological Informatics},
volume = {82},
pages = {1-9},
publisher = {Elsevier},
doi = {https://doi.org/10.1016/j.ecoinf.2024.102710},
author = {Hannes Kath and Patricia P. Serafini and Ivan B. Campos and Thiago Gouvea and Daniel Sonntag},
keywords = {Passive acoustic monitoring, Active learning, Transfer learning, BirdNet},
url = {https://www.sciencedirect.com/science/article/pii/S1574954124002528 https://www.dfki.de/fileadmin/user_upload/import/15156_Kath_et_al_2024_Leveraging_transfer_learning_and_active_learning_for_data_annotation_in_passive.pdf}
}
H. Kath, T. Gouvea, und D. Sonntag, "Active and Transfer Learning for Efficient Identification of Species in Multi-Label Bioacoustic Datasets" in Proc. Proceedings of the 2024 International Conference on Information Technology for Social Good. ACM International Conference on Information Technology for Social Good (GoodIT-2024), September 4-6, Bremen, Germany, 2024.
doi: 10.1145/3677525.3678635
@inproceedings{pub15162, abstract = {The complex system of life on Earth, biodiversity, provides the essential resources for human survival. However, humanity is the primary driver of species extinction, accelerating the extinction rate to 100-1000 times higher than pre-industrial times. To combat this alarming trend, detailed information on biodiversity is needed, making effective monitoring technologies essential. Passive Acoustic Monitoring (PAM) has emerged as a key technology for scalable wildlife monitoring. While PAM is effectively used to record vast amounts of acoustic data, the automatic identification of species remains an unsolved challenge. This elaboration formally describes the problem of identifying as many species as possible in an unlabelled PAM dataset while examining the fewest samples. A pilot study is conducted to investigate the potential of four approaches combining transfer learning with adapted uncertainty or diversity active learning sampling strategies. The findings of this study indicate that uncertainty-based sampling strategies yield superior performance to random sampling. In contrast, the diversity-based strategies used demonstrate inferior performance, with improvements observed when fine-tuning the embedding space using already labelled data. This study lays the groundwork for future research aimed at iteratively fine-tuning the embedding space in combination with uncertainty and diversity methods.},
year = {2024},
title = {Active and Transfer Learning for Efficient Identification of Species in Multi-Label Bioacoustic Datasets},
booktitle = {Proceedings of the 2024 International Conference on Information Technology for Social Good. ACM International Conference on Information Technology for Social Good (GoodIT-2024), September 4-6, Bremen, Germany},
pages = {22-25},
isbn = {9798400710940},
publisher = {ACM},
doi = {https://doi.org/10.1145/3677525.3678635},
author = {Hannes Kath and Thiago Gouvea and Daniel Sonntag},
url = {https://dl.acm.org/doi/10.1145/3677525.3678635}
}
D. Gurgurov, M. Hartmann, und S. Ostermann, "Adapting Multilingual LLMs to Low-Resource Languages with Knowledge Graphs via Adapters" in Proc. Proceedings of the 1st Workshop on Knowledge Graphs and Large Language Models (KaLLM 2024). Workshop on Knowledge Graphs and Large Language Models (KaLLM-2024), August 15, Bangkok, Thailand, 2024.
@inproceedings{pub15196, abstract = {This paper explores the integration of graph knowledge from linguistic ontologies into multilingual Large Language Models (LLMs) using adapters to improve performance for low-resource languages (LRLs) in sentiment analysis (SA) and named entity recognition (NER). Building upon successful parameter-efficient fine-tuning techniques, such as K-ADAPTER and MAD-X, we propose a similar approach for incorporating knowledge from multilingual graphs, connecting concepts in various languages with each other through linguistic relationships, into multilingual LLMs for LRLs. Specifically, we focus on eight LRLs --- Maltese, Bulgarian, Indonesian, Nepali, Javanese, Uyghur, Tibetan, and Sinhala --- and employ language-specific adapters fine-tuned on data extracted from the language-specific section of ConceptNet, aiming to enable knowledge transfer across the languages covered by the knowledge graph. We compare various fine-tuning objectives, including standard Masked Language Modeling (MLM), MLM with full-word masking, and MLM with targeted masking, to analyze their effectiveness in learning and integrating the extracted graph data. Through empirical evaluation on language-specific tasks, we assess how structured graph knowledge affects the performance of multilingual LLMs for LRLs in SA and NER, providing insights into the potential benefits of adapting language models for low-resource scenarios.},
year = {2024},
title = {Adapting Multilingual LLMs to Low-Resource Languages with Knowledge Graphs via Adapters},
booktitle = {Proceedings of the 1st Workshop on Knowledge Graphs and Large Language Models (KaLLM 2024). Workshop on Knowledge Graphs and Large Language Models (KaLLM-2024), August 15, Bangkok, Thailand},
editor = {Russa Biswas and Lucie-Aimée Kaffee and Oshin Agarwal and Pasquale Minervini and Sameer Singh and Gerard de Melo},
pages = {63-74},
publisher = {Association for Computational Linguistics},
author = {Daniil Gurgurov and Mareike Hartmann and Simon Ostermann},
url = {https://aclanthology.org/2024.kallm-1.7 https://www.dfki.de/fileadmin/user_upload/import/15196_2024.kallm-1.7.pdf}
}
M. A. Kadir, H. M. T. Alam, D. Srivastav, H. Profitlich, und D. Sonntag, "Partial Image Active Annotation (PIAA): An Efficient Active Learning Technique Using Edge Information in Limited Data Scenarios" KI - Künstliche Intelligenz, German Journal on Artificial Intelligence - Organ des Fachbereiches "Künstliche Intelligenz" der Gesellschaft für Informatik e.V. (KI), vol. -.
2024.
doi: 10.1007/s13218-024-00849-6
@article{pub15260, abstract = {Active learning (AL) algorithms are increasingly being used to train models with limited data for annotation tasks. However, the selection of data for AL is a complex issue due to the restricted information on unseen data. To tackle this problem, a technique we refer to as Partial Image Active Annotation (PIAA) employs the edge information of unseen images as prior knowledge to gauge uncertainty. This uncertainty is determined by examining the divergence and entropy in model predictions across edges. The resulting measure is then applied to choose superpixels from input images for active annotation. We demonstrate the effectiveness of PIAA in multi-class Optical Coherence Tomography (OCT) segmentation tasks, attaining a Dice score comparable to state-of-the-art OCT segmentation algorithms trained with extensive annotated data. Concurrently, we successfully reduce annotation label costs to 12%, 2.3%, and 3%, respectively, across three publicly accessible datasets (Duke, AROI, and UMN).},
month = {6},
year = {2024},
title = {Partial Image Active Annotation (PIAA): An Efficient Active Learning Technique Using Edge Information in Limited Data Scenarios},
journal = {KI - Künstliche Intelligenz, German Journal on Artificial Intelligence - Organ des Fachbereiches "Künstliche Intelligenz" der Gesellschaft für Informatik e.V. (KI)},
volume = {-},
pages = {1-12},
publisher = {Springer},
doi = {https://doi.org/10.1007/s13218-024-00849-6},
author = {Md Abdul Kadir and Hasan Md Tusfiqur Alam and Devansh Srivastav and Hans-Jürgen Profitlich and Daniel Sonntag},
url = {https://link.springer.com/article/10.1007/s13218-024-00849-6}
}
M. A. Kadir, G. Addluri, und D. Sonntag, Revealing Vulnerabilities of Neural Networks in Parameter Learning and Defense Against Explanation-Aware Backdoors.
@misc{pub15261, abstract = {Explainable Artificial Intelligence (XAI) strategies play a crucial part in increasing the understanding and trustworthiness of neural networks. Nonetheless, these techniques could potentially generate misleading explanations. Blinding attacks can drastically alter a machine learning algorithm's prediction and explanation, providing misleading information by adding visually unnoticeable artifacts into the input, while maintaining the model's accuracy. It poses a serious challenge in ensuring the reliability of XAI methods. To ensure the reliability of XAI methods poses a real challenge, we leverage statistical analysis to highlight the changes in CNN weights within a CNN following blinding attacks. We introduce a method specifically designed to limit the effectiveness of such attacks during the evaluation phase, avoiding the need for extra training. The method we suggest defences against most modern explanation-aware adversarial attacks, achieving an approximate decrease of ~99\% in the Attack Success Rate (ASR) and a ~91\% reduction in the Mean Square Error (MSE) between the original explanation and the defended (post-attack) explanation across three unique types of attacks.},
month = {3},
year = {2024},
title = {Revealing Vulnerabilities of Neural Networks in Parameter Learning and Defense Against Explanation-Aware Backdoors},
author = {Md Abdul Kadir and Gowthamkrishna Addluri and Daniel Sonntag},
status_notes = {preprint},
url = {https://arxiv.org/abs/2403.16569 https://www.dfki.de/fileadmin/user_upload/import/15261_2403.16569v1.pdf}
}
K. Kuznetsov, S. Bittner, A. M. Selim, M. Barz, und D. Sonntag, "FormTwin: A Framework for Pen-based Data Collection" in Proc. Adjunct Proceedings of the 32nd ACM Conference on User Modeling, Adaptation and Personalization. International Conference on User Modeling, Adaptation, and Personalization (UMAP-2024), July 1-4, Cagliari, Italy, 1601 Broadway, 10th Floor New York, NY 10019-7434, 2024.
doi: 10.1145/3631700.3664875
@inproceedings{pub15331, abstract = {Paper and digital forms are widely used to collect user information across multiple domains, such as research, healthcare, and education. However, both types still lack application and follow-up interpretation: Paper forms need to be digitised meticulously to be analyzed or shared with team members efficiently; in comparison, digital forms cannot convey handwriting and might require technical literacy. We present the FormTwin data collection tool—an alternative to online forms, which allows for the efficient reuse of existent paper forms while providing the convenience of digital forms. FormTwin can digitise a wide range of forms with the integrated form annotation application. Then, it combines two input channels: A stylus on a tablet and a digital smart pen on plain paper, which duplicates the input on a mobile application in real-time. We aim to improve the efficiency and accessibility of data collection for practitioners with a modular system that combines both the digital accessibility of digital forms with keeping the needed technical literacy low and retaining the quality of hand-drawn sketches.},
month = {7},
year = {2024},
title = {FormTwin: A Framework for Pen-based Data Collection},
booktitle = {Adjunct Proceedings of the 32nd ACM Conference on User Modeling, Adaptation and Personalization. International Conference on User Modeling, Adaptation, and Personalization (UMAP-2024), July 1-4, Cagliari, Italy},
pages = {132-135},
isbn = {979-8-4007-0466-6/24/07},
address = {1601 Broadway, 10th Floor New York, NY 10019-7434},
publisher = {ACM Digital Library},
doi = {https://doi.org/10.1145/3631700.3664875},
author = {Konstantin Kuznetsov and Sara-Jane Bittner and Abdulrahman Mohamed Selim and Michael Barz and Daniel Sonntag},
keywords = {Digital Pen, Digital Forms, Data Collection Methods, Research Tools, Intelligent User Interfaces},
organization = {Association for Computing Machinery (ACM)},
url = {https://doi.org/10.1145/3631700.3664875 https://www.dfki.de/fileadmin/user_upload/import/15331_3631700.3664875.pdf}
}
D. Sonntag, T. Gouvea, M. Barz, A. Anagnostopoulou, S. Liang, S. Bittner, und F. Scheurer, "The Interactive Deep Learning Enterprise (No-IDLE) meets ChatGPT" German Research Center for AI2024.
@techreport{pub15382, series = {DFKI Technical Report},
abstract = {This DFKI technical report presents the anatomy of the No-IDLE meets ChatGPT prototype system (funded by the German Federal Ministry of Education and Research) that provides not only basic and fundamental research in interactive machine learning, but also reveals deeper insights into how to leverage the opportunities arising from large language models and technologies for the No-IDLE project. No-IDLE’s goals and scientific challenges centre around the desire to increase the reach of interactive deep learning solutions for non-experts in machine learning. No-IDLE aims to enhance the interaction between humans and machines for the purpose of updating deep learning models, integrating cutting-edge human-computer interaction techniques and advanced deep learning approaches. Considering the recent advances in LLMs and their multimodal capabilities, the overall objective of "No-IDLE meets ChatGPT" should be well motivated. One of the key innovations described in this technical report is a methodology including benchmark studies for interactive machine learning combined with LLMs which will become central when we start interacting with semi-intelligent machines based on optimisation methods like automatic prompt engineering or natural language inference. Our main research question is how ChatGPT and other variants can help improve the accuracy of (semi-) automatic subtasks in image retrieval, captioning, and person/scene recognition.},
year = {2024},
title = {The Interactive Deep Learning Enterprise (No-IDLE) meets ChatGPT},
volume = {-},
institution = {German Research Center for AI},
author = {Daniel Sonntag and Thiago Gouvea and Michael Barz and Aliki Anagnostopoulou and Siting Liang and Sara-Jane Bittner and Franziska Scheurer},
url = {https://www.dfki.de/fileadmin/user_upload/import/15382_No_IDLE_meets_ChatGPT_Technical_Report.pdf}
}
O. S. Bhatti, H. Sriram, A. M. Selim, C. Conati, M. Barz, und D. Sonntag, "Detecting when Users Disagree with Generated Captions" in Proc. Companion Proceedings of the 26th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2024), November 4, San José, Costa Rica, 2024.
doi: 10.1145/3686215.3688382
@inproceedings{pub15402, series = {ICMI Companion '24},
abstract = {The pervasive integration of artificial intelligence (AI) into daily life has led to a growing interest in AI agents that can learn continuously. Interactive Machine Learning (IML) has emerged as a promising approach to meet this need, essentially involving human experts in the model training process, often through iterative user feedback. However, repeated feedback requests can lead to frustration and reduced trust in the system. Hence, there is increasing interest in refining how these systems interact with users to ensure efficiency without compromising user experience. Our research investigates the potential of eye tracking data as an implicit feedback mechanism to detect user disagreement with AI-generated captions in image captioning systems. We conducted a study with 30 participants using a simulated captioning interface and gathered their eye movement data as they assessed caption accuracy. The goal of the study was to determine whether eye tracking data can predict user agreement or disagreement effectively, thereby strengthening IML frameworks. Our findings reveal that, while eye tracking shows promise as a valuable feedback source, ensuring consistent and reliable model performance across diverse users remains a challenge.},
year = {2024},
title = {Detecting when Users Disagree with Generated Captions},
booktitle = {Companion Proceedings of the 26th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2024), November 4, San José, Costa Rica},
note = {HumanEYEze Workshop},
pages = {195-203},
isbn = {9798400704635},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3686215.3688382},
author = {Omair Shahzad Bhatti and Harshinee Sriram and Abdulrahman Mohamed Selim and Cristina Conati and Michael Barz and Daniel Sonntag},
keywords = {disagreement detection, emotion detection, eye tracking, gaze, interactive machine learning, user disagreement},
url = {https://doi.org/10.1145/3686215.3688382 https://www.dfki.de/fileadmin/user_upload/import/15402_3686215.3688382.pdf}
}
M. Barz, R. Bednarik, A. Bulling, C. Conati, und D. Sonntag, "HumanEYEze 2024: Workshop on Eye Tracking for Multimodal Human-Centric Computing" in Proc. Proceedings of the 26th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2024), November 4-8, San José, Costa Rica, 2024.
doi: 10.1145/3678957.3688384
@inproceedings{pub15403, series = {ICMI '24},
abstract = {The HumanEYEze 2024 workshop aims to explore the role of eye tracking in developing human-centered multimodal AI systems. Over the past two decades, eye tracking has evolved from a diagnostic tool to an important input modality for real-time interactive systems, driven by advancements in hardware that have improved its affordability, availability, and performance. Initially used in specialized applications, eye tracking now significantly impacts research on gaze-based multimodal interaction. Recently, eye-based user and context modeling has emerged, utilizing eye movements to provide rich insights into user behavior and interaction contexts. The workshop aims to bring together researchers from eye tracking, multimodal human-computer interaction, and AI. It aims to enhance understanding of integrating eye tracking into multimodal human-centered computing. The expected outcomes include fostering collaborations and promoting knowledge exchange.},
year = {2024},
title = {HumanEYEze 2024: Workshop on Eye Tracking for Multimodal Human-Centric Computing},
booktitle = {Proceedings of the 26th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2024), November 4-8, San José, Costa Rica},
isbn = {9798400704628},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3678957.3688384},
author = {Michael Barz and Roman Bednarik and Andreas Bulling and Cristina Conati and Daniel Sonntag},
keywords = {Eye Tracking, Gaze, Human-centered AI, Human-centric Computing, Multimodal Interaction, User Modeling, Workshop},
url = {https://doi.org/10.1145/3678957.3688384 https://www.dfki.de/fileadmin/user_upload/import/15403_humaneyeze.pdf}
}
A. M. Selim, O. S. Bhatti, M. Barz, und D. Sonntag, "Perceived Text Relevance Estimation Using Scanpaths and GNNs" in Proc. Proceedings of the 26th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2024), November 4-8, San Jose, Costa Rica, 2024.
doi: 10.1145/3678957.3685736
@inproceedings{pub15407, series = {ICMI '24},
abstract = {A scanpath is an important concept in eye tracking that represents a person’s eye movements in a graph-like structure. Passive gaze-based interfaces, in which users do not consciously interact using their eyes, typically interpret users’ scanpaths to enable adaptive and personalised interaction. Despite the benefits of graph neural networks (GNNs) in graph processing, this technology has not been considered for that purpose. An example application is perceived relevance estimation, which still suffers from low classification performance. In this work, we investigate how and whether GNNs can be used to analyse scanpaths for readers’ perceived relevance estimation using the gazeRE dataset. This dataset contains eye tracking data from 24 participants, who rated the relevance of 12 short and 12 long documents in relation to a given query. The relevance was assigned either to an entire short document or to each paragraph within a long document, which allowed us to investigate two different GNN tasks. For comparison, we reproduced the gazeRE baseline using Random Forest and Support Vector classifiers, and an additional Convolutional Neural Network (CNN) from the literature. All models were evaluated using leave-users-out cross-validation. For short documents, the GNNs surpassed the baseline methods, with certain experiments showing an absolute balanced accuracy improvement of 7.6% and 14.3% over the CNN and gazeRE baselines, respectively. However, similar improvements were not observed in long documents. This work investigates and discusses the future potential of using GNNs as a scanpath analysis method for passive gaze-based applications, such as implicit relevance estimation.},
month = {11},
year = {2024},
title = {Perceived Text Relevance Estimation Using Scanpaths and GNNs},
booktitle = {Proceedings of the 26th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2024), November 4-8, San Jose, Costa Rica},
pages = {418-427},
isbn = {9798400704628},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3678957.3685736},
author = {Abdulrahman Mohamed Selim and Omair Shahzad Bhatti and Michael Barz and Daniel Sonntag},
keywords = {Eye Tracking, GNN, Passive Gaze-based Application, Scanpath},
url = {https://doi.org/10.1145/3678957.3685736 https://www.dfki.de/fileadmin/user_upload/import/15407_3678957.3685736.pdf}
}
L. Kopácsi, A. Klimenko, M. Barz, und D. Sonntag, "Exploring Gaze-Based Menu Navigation in Virtual Environments" in Proc. Proceedings of the 2024 ACM Symposium on Spatial User Interaction (SUI '24). ACM Symposium on Spatial User Interaction (SUI-2024), October 7-8, Trier, Germany, 2024.
doi: 10.1145/3677386.3688887
@inproceedings{pub15412, abstract = {With the integration of eye tracking technologies in Augmented Reality (AR) and Virtual Reality (VR) headsets, gaze-based interactions have opened up new possibilities for user interface design, including menu navigation. Prior research in gaze-based menu navigation in VR has predominantly focused on pie menus, yet recent studies indicate a user preference for list layouts. However, the comparison of gaze-based interactions on list menus is lacking in the literature. This work aims to fill this gap by exploring the viability of list menus for multi-level gaze-based menu navigation in VR and evaluating the efficiency of various gaze-based interactions, such as dwelling and border-crossing, against traditional controller navigation and multi-modal interaction using gaze and button press.},
number = {40},
month = {10},
year = {2024},
title = {Exploring Gaze-Based Menu Navigation in Virtual Environments},
booktitle = {Proceedings of the 2024 ACM Symposium on Spatial User Interaction (SUI '24). ACM Symposium on Spatial User Interaction (SUI-2024), October 7-8, Trier, Germany},
pages = {1-2},
isbn = {9798400710889},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3677386.3688887},
author = {László Kopácsi and Albert Klimenko and Michael Barz and Daniel Sonntag},
keywords = {Extended Reality (XR), Eye Tracking, Gaze-based Interaction, Menu Navigation},
url = {https://dl.acm.org/doi/10.1145/3677386.3688887 https://www.dfki.de/fileadmin/user_upload/import/15412_3677386.3688887.pdf}
}
L. Kopácsi, P. Karagiannis, S. Makris, J. Kildal, A. Rivera-Pinto, J. R. de Munain, J. Rosel, M. Madarieta, N. Tseregkounis, K. Salagianni, P. Aivaliotis, M. Barz, und D. Sonntag, "The MASTER XR Platform for Robotics Training in Manufacturing" in Proc. Proceedings of the 30th ACM Symposium on Virtual Reality Software and Technology (VRST '24). ACM Symposium on Virtual Reality Software and Technology (VRST-2024), October 9-11, Trier, Germany, 2024.
doi: 10.1145/3641825.3689514
@inproceedings{pub15413, abstract = {The MASTER project introduces an open Extended Reality (XR) platform designed to enhance human-robot collaboration and train workers in robotics within manufacturing settings. It includes modules for creating safe workspaces, intuitive robot programming, and user-friendly human-robot interactions (HRI), including eye-tracking technologies. The development of the platform is supported by two open calls targeting technical SMEs and educational institutes to enhance and test its functionalities. By employing the learning-by-doing methodology and integrating effective teaching principles, the MASTER platform aims to provide a comprehensive learning environment, preparing students and professionals for the complexities of flexible and collaborative manufacturing settings.},
number = {67},
month = {10},
year = {2024},
title = {The MASTER XR Platform for Robotics Training in Manufacturing},
booktitle = {Proceedings of the 30th ACM Symposium on Virtual Reality Software and Technology (VRST '24). ACM Symposium on Virtual Reality Software and Technology (VRST-2024), October 9-11, Trier, Germany},
pages = {1-2},
isbn = {9798400705359},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3641825.3689514},
author = {László Kopácsi and Panagiotis Karagiannis and Sotiris Makris and Johan Kildal and Andoni Rivera-Pinto and Judit Ruiz de Munain and Jesús Rosel and Maria Madarieta and Nikolaos Tseregkounis and Konstantina Salagianni and Panagiotis Aivaliotis and Michael Barz and Daniel Sonntag},
keywords = {Extended Reality (XR), Eye Tracking, Human-Robot Collaboration, Industry 4.0, Manufacturing, Robotics, Worker Training},
url = {https://doi.org/10.1145/3641825.3689514 https://www.dfki.de/fileadmin/user_upload/import/15413_3641825.3689514.pdf}
}
L. Kopácsi, T. S. Schneider, C. Karr, M. Barz, und D. Sonntag, "GazeLock: Gaze- and Lock Pattern-Based Authentication" in Proc. Proceedings of the 30th ACM Symposium on Virtual Reality Software and Technology (VRST '24). ACM Symposium on Virtual Reality Software and Technology (VRST-2024), October 9-11, Trier, Germany, 2024.
doi: 10.1145/3641825.3689520
@inproceedings{pub15414, abstract = {Password entry is common authentication approach in Extended Reality (XR) applications for its simplicity and familiarity, but it faces challenges in public and dynamic environments due to its cumbersome nature and susceptibility to observation attacks. Manual password input can be disruptive and prone to theft through shoulder surfing or surveillance. While alternative knowledge-based approaches exist, they often require complex physical gestures and are impractical for frequent public use. We present GazeLock, an eye-tracking and lock pattern-based authentication method. This method aims to provide an easy-to-learn and efficient alternative by leveraging familiar lock patterns operated through gaze. It ensures resilience to external observation, as physical interaction is unnecessary and eyes are obscured by the headset. Its hands-free, discreet nature makes it suitable for secure public use. We demonstrate this method by simulating the unlocking of a smart lock via an XR headset, showcasing its potential applications and benefits in real-world scenarios.},
number = {94},
month = {10},
year = {2024},
title = {GazeLock: Gaze- and Lock Pattern-Based Authentication},
booktitle = {Proceedings of the 30th ACM Symposium on Virtual Reality Software and Technology (VRST '24). ACM Symposium on Virtual Reality Software and Technology (VRST-2024), October 9-11, Trier, Germany},
pages = {1-2},
isbn = {9798400705359},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3641825.3689520},
author = {László Kopácsi and Tobias Sebastian Schneider and Chiara Karr and Michael Barz and Daniel Sonntag},
keywords = {Authentication, Extended Reality (XR), Eye Tracking, Gaze-based Interaction},
url = {https://doi.org/10.1145/3641825.3689520 https://www.dfki.de/fileadmin/user_upload/import/15414_3641825.3689520.pdf}
}
H. Kath, I. Troshani, B. Lüers, T. Gouvea, und D. Sonntag, "Enhancing Biodiversity Monitoring: An Interactive Tool for Efficient Identification of Species in Large Bioacoustics Datasets" in Proc. ICMI Companion '24: Companion Proceedings of the 26th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2024), 26th International Conference on Multimodal Interaction, November 4-8, Costa Rica, 2024.
doi: 10.1145/3686215.3688374
@inproceedings{pub15416, abstract = {Biodiversity loss is a major challenge for humanity, which has increased the rate of species extinction by a factor of 100-1000 compared to pre-industrial times. XPRIZE Rainforest is a competition focused on developing a pipeline for real-time biodiversity measurement: teams have 24 hours to collect data and another 48 hours to produce a list of species present in the data. Passive acoustic monitoring (PAM) is a scalable technology for data acquisition in wildlife monitoring. However, analyzing large PAM datasets poses a significant challenge. This paper presents a tool used by the Brazilian team during the XPRIZE Rainforest finals. Using a combination of audio separation, weakly supervised learning, transfer learning, active learning, multiple-instance learning, and novel class detection, samples are carefully selected and presented to the user for annotation.},
year = {2024},
title = {Enhancing Biodiversity Monitoring: An Interactive Tool for Efficient Identification of Species in Large Bioacoustics Datasets},
booktitle = {ICMI Companion '24: Companion Proceedings of the 26th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2024), 26th International Conference on Multimodal Interaction, November 4-8, Costa Rica},
pages = {91-93},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3686215.3688374},
author = {Hannes Kath and Ilira Troshani and Bengt Lüers and Thiago Gouvea and Daniel Sonntag},
keywords = {passive acoustic monitoring, novel class detection, transfer learning, active learning},
url = {https://www.dfki.de/fileadmin/user_upload/import/15416_ICMI_Demo_Paper.pdf}
}
H. M. D. Nguyen, H. M. T. Alam, T. Nguyen, D. Srivastav, H. Profitlich, N. Le, und D. Sonntag, "Deep Learning for Ophthalmology - The State-of-the-Art and Future Trends" DFKI2024.
@techreport{pub15459, series = {DFKI Research Reports, RR},
abstract = {The emergence of artificial intelligence (AI), particularly Deep Learning (DL), has marked a new era in the realm of ophthalmology, offering the transformative potential for the diagnosis and treatment of posterior segment eye diseases. This review explores the cutting-edge applications of DL across a range of ocular conditions, including diabetic retinopathy, glaucoma, age-related macular degeneration, and retinal vessel segmentation. We provide a comprehensive overview of foundational machine learning techniques and advanced DL architectures, such as convolutional neural networks, attention mechanisms, and transformer-based models, highlighting the evolving role of AI in enhancing diagnostic accuracy, optimizing treatment strategies, and improving overall patient care. Additionally, we present key challenges in integrating AI solutions into clinical practice, including ensuring data diversity, improving algorithm transparency, and effectively leveraging multimodal data. This review emphasizes AI's potential to improve disease diagnosis and enhance patient care while stressing the importance of collaborative efforts to overcome these barriers and fully harness AI's impact in advancing eye care.},
year = {2024},
title = {Deep Learning for Ophthalmology - The State-of-the-Art and Future Trends},
volume = {01},
institution = {DFKI},
author = {Ho Minh Duy Nguyen and Hasan Md Tusfiqur Alam and Tai Nguyen and Devansh Srivastav and Hans-Jürgen Profitlich and Ngan Le and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/15459_2501.04073v1.pdf}
}
D. Srivastav, T. Kaltbach, A. A. Mughal, N. Giriyan, M. B. Younus, T. Jungbluth, J. Britz, J. Alexandersson, und M. Rekrut, "Mindful Mobility: EEG-Based Brain-Computer Interaction for Elevator Control Using Muse Headset" in Proc. Proceedings of the International Conference on Ubiquitous Computing and Ambient Intelligence (UCAmI 2024). International Conference on Ubiquitous Computing and Ambient Intelligence (UCAmI-2024), November 27-30, Ulster University, Belfast, United Kingdom, 2024.
@inproceedings{pub15474, abstract = {Brain-Computer Interface (BCI) systems represent an innovative approach to human-computer interaction, enabling users to control devices and interact with technology solely through brain activity. This study investigates the feasibility and potential of non-invasive EEG-based BCI for elevator control, addressing two primary research questions: 1) Can a person reliably control an elevator through a BCI system? and 2) What are the usability and user experience outcomes of such a system? We integrated a Muse headset with a remote-controllable elevator system using an iPhone as the interface over a local network. This setup allowed users to operate the elevator using blinking, jaw clenching, and mental focussing as triggers. Performance, accuracy, and user experience were evaluated through experiments involving 50 participants aged 12 to 60. Usability was measured with the System Usability Scale (SUS) questionnaire along with additional feedback questions. Key findings indicate that the system achieved an average SUS score of 80.3, reflecting excellent usability on the adjective rating scale. Moreover, 94% of participants successfully controlled the elevator, performing tasks such as activating and deactivating brain control, calling the elevator, and selecting floors. The user experience questionnaires reveal that most participants found the system easy to use, well-integrated, and perceived the introduction of brain-controlled elevators to positively impact accessibility and inclusivity in buildings.},
year = {2024},
title = {Mindful Mobility: EEG-Based Brain-Computer Interaction for Elevator Control Using Muse Headset},
booktitle = {Proceedings of the International Conference on Ubiquitous Computing and Ambient Intelligence (UCAmI 2024). International Conference on Ubiquitous Computing and Ambient Intelligence (UCAmI-2024), November 27-30, Ulster University, Belfast, United Kingdom},
publisher = {Springer},
author = {Devansh Srivastav and Thomas Kaltbach and Ahmer Akhtar Mughal and Nischal Giriyan and Moaz Bin Younus and Tobias Jungbluth and Jochen Britz and Jan Alexandersson and Maurice Rekrut},
url = {https://www.dfki.de/fileadmin/user_upload/import/15474_Mindful_Mobility.pdf}
}
H. M. D. Nguyen, N. Lukashina, T. Nguyen, A. T. Le, T. Nguyen, N. Ho, J. Peters, D. Sonntag, V. Zaverkin, und M. Niepert, "Structure-Aware E(3)-Invariant Molecular Conformer Aggregation Networks" in Proc. Proceedings of the 41st International Conference on Machine Learning. International Conference on Machine Learning (ICML), July 21-27, Austria, 2024.
@inproceedings{pub15617, abstract = {A molecule's 2D representation consists of its atoms, their attributes, and the molecule's covalent bonds. A 3D (geometric) representation of a molecule is called a conformer and consists of its atom types and Cartesian coordinates. Every conformer has a potential energy, and the lower this energy, the more likely it occurs in nature. Most existing machine learning methods for molecular property prediction consider either 2D molecular graphs or 3D conformer structure representations in isolation. Inspired by recent work on using ensembles of conformers in conjunction with 2D graph representations, we propose E(3)-invariant molecular conformer aggregation networks. The method integrates a molecule's 2D representation with that of multiple of its conformers. Contrary to prior work, we propose a novel 2D-3D aggregation mechanism based on a differentiable solver for the Fused Gromov-Wasserstein Barycenter problem and the use of an efficient conformer generation method based on distance geometry. We show that the proposed aggregation mechanism is E(3) invariant and propose an efficient GPU implementation. Moreover, we demonstrate that the aggregation mechanism helps to significantly outperform state-of-the-art molecule property prediction methods on established datasets.},
year = {2024},
title = {Structure-Aware E(3)-Invariant Molecular Conformer Aggregation Networks},
booktitle = {Proceedings of the 41st International Conference on Machine Learning. International Conference on Machine Learning (ICML), July 21-27, Austria},
publisher = {JMLR.org},
author = {Ho Minh Duy Nguyen and Nina Lukashina and Tai Nguyen and An T. Le and TrungTin Nguyen and Nhat Ho and Jan Peters and Daniel Sonntag and Viktor Zaverkin and Mathias Niepert},
url = {https://arxiv.org/pdf/2402.01975}
}
H. Tran, H. M. D. Nguyen, M. Nguyen, N. H. Le, und B. T. Nguyen, "Energy Minimizing-based Token Merging for Accelerating Transformers" in Proc. International Conference on Learning Representations (ICLR), Practical ML for Low Resource Settings Workshop. International Conference on Learning Representations (ICLR), May 11-12, 2024.
@inproceedings{pub15618, abstract = {Model compression has been an active research field to reduce the size and complexity of the model. In a recent noteworthy study, ToMe and its variants utilize the Bipartite Soft Matching (BSM) algorithm in which tokens representing patches in an image are split into two sets, and top k similar tokens from one set are merged. This approach not only utilizes pre-trained weights but also enhances speed and reduces memory usage. However, this algorithm has some drawbacks. The choice of a token-splitting strategy significantly influences the algorithm’s performance since tokens in one set can only perceive tokens in the other set, leading to mis-merging issues. Furthermore, although ToMe is effective in the initial layers, it becomes increasingly problematic in deeper layers as the number of tokens diminishes because of damaged informative tokens. To address these limitations, rather than relying on specific splitting strategies like BSM, we propose a new algorithm called PiToMe. Specifically, we prioritize the protection of informative tokens using an additional factor called the energy score. In experiments, PiToMe achieved up to a 50% memory reduction while exhibiting superior off-the-shelf performance on image classification ( keeping 1.71% average performance drop compared to 2.6% for ToMe) and image-text retrieval (1.35% average performance drop compared to 6.89% for ToMe) compared to ToMe and ToMe-based approaches dependent solely on token similarity.},
year = {2024},
title = {Energy Minimizing-based Token Merging for Accelerating Transformers},
booktitle = {International Conference on Learning Representations (ICLR), Practical ML for Low Resource Settings Workshop. International Conference on Learning Representations (ICLR), May 11-12},
publisher = {JMLR.org},
author = {Hoai-Chau Tran and Ho Minh Duy Nguyen and Manh-Duy Nguyen and Ngan Hoang Le and Binh T. Nguyen},
url = {https://www.dfki.de/fileadmin/user_upload/import/15618_84_Energy_Minimizing_based_tok.pdf}
}
H. Tran, H. M. D. Nguyen, D. M. Nguyen, T. Nguyen, N. Le, P. Xie, D. Sonntag, J. Zou, B. T. Nguyen, und M. Niepert, "Accelerating Transformers with Spectrum-Preserving Token Merging" in Proc. The Thirty-Eighth Annual Conference on Neural Information Processing Systems. Neural Information Processing Systems (NeurIPS-2024), December 10-15, Canada, 2024.
@inproceedings{pub15619, abstract = {Increasing the throughput of the Transformer architecture, a foundational component used in numerous state-of-the-art models for vision and language tasks (e.g., GPT, LLaVa), is an important problem in machine learning. One recent and effective strategy is to merge token representations within Transformer models, aiming to reduce computational and memory requirements while maintaining accuracy. Prior works have proposed algorithms based on Bipartite Soft Matching (BSM), which divides tokens into distinct sets and merges the top k similar tokens. However, these methods have significant drawbacks, such as sensitivity to token splitting strategies and damage to informative tokens in later layers. This paper presents a novel paradigm called PITOME, which prioritizes the preservation of informative tokens using an additional metric termed the energy score. This score identifies large clusters of similar tokens as high-energy, indicating potential candidates for merging, while smaller (unique and isolated) clusters are considered as low-energy and preserved. Experimental findings demonstrate that PITOME saved from 40-60% FLOPs of the base models while exhibiting superior off-the-shelf performance on image classification (0.5% average performance drop of ViT-MAEH compared to 2.6% as baselines), image-text retrieval (0.3% average performance drop of CLIP on Flickr30k compared to 4.5% as others), and analogously in visual questions answering with LLaVa-7B. Furthermore, PITOME is theoretically shown to preserve intrinsic spectral properties to the original token space under mild conditions.},
year = {2024},
title = {Accelerating Transformers with Spectrum-Preserving Token Merging},
booktitle = {The Thirty-Eighth Annual Conference on Neural Information Processing Systems. Neural Information Processing Systems (NeurIPS-2024), December 10-15, Canada},
publisher = {JMLR.org},
author = {Hoai-Chau Tran and Ho Minh Duy Nguyen and Duy M. Nguyen and TrungTin Nguyen and Ngan Le and Pengtao Xie and Daniel Sonntag and James Zou and Binh T. Nguyen and Mathias Niepert},
url = {https://arxiv.org/pdf/2405.16148}
}
M. A. Kadir, G. Addluri, und D. Sonntag, "Harmonizing Feature Attributions Across Deep Learning Architectures: Enhancing Interpretability and Consistency." Springer, Cham.
@inbook{pub14630, series = {Lecture Notes in Computer Science},
abstract = {Enhancing the interpretability and consistency of machine learning models is critical to their deployment in real-world applications. Feature attribution methods have gained significant attention, which provide local explanations of model predictions by attributing importance to individual input features. This study examines the generalization of feature attributions across various deep learning architectures, such as convolutional neural networks (CNNs) and vision transformers. We aim to assess the feasibility of utilizing a feature attribution method as a future detector and examine how these features can be harmonized across multiple models employing distinct architectures but trained on the same data distribution. By exploring this harmonization, we aim to develop a more coherent and optimistic understanding of feature attributions, enhancing the consistency of local explanations across diverse deep-learning models. Our findings highlight the potential for harmonized feature attribution methods to improve interpretability and foster trust in machine learning applications, regardless of the underlying architecture.},
year = {2023},
title = {Harmonizing Feature Attributions Across Deep Learning Architectures: Enhancing Interpretability and Consistency},
booktitle = {Dietmar Seipel; Alexander Steen;: German Conference on Artificial Intelligence},
pages = {90-97},
isbn = {978-3-031-42607-0},
publisher = {Springer, Cham},
author = {Md Abdul Kadir and Gowthamkrishna Addluri and Daniel Sonntag},
url = {https://rdcu.be/dvrE0 https://www.dfki.de/fileadmin/user_upload/import/14630_Harmonizing_Feature_Attributions_Across_Deep_Learning_Architectures__Enhancing_Interpretability_and_Consistency_KI2023.pdf}
}
M. A. Kadir, H. M. T. Alam, und D. Sonntag, "EdgeAL: An Edge Estimation Based Active Learning Approach for OCT Segmentation" in Proc. International Conference on Medical Image Computing and Computer-Assisted Intervention. Medical Image Computing and Computer Assisted Intervention (MICCAI-2023), 2023.
@inproceedings{pub14635, abstract = {Active learning algorithms have become increasingly popular for training models with limited data. However, selecting data for annotation remains a challenging problem due to the limited information available on unseen data. To address this issue, we propose EdgeAL, which utilizes the edge information of unseen images as a priori information for measuring uncertainty. The uncertainty is quantified by analyzing the divergence and entropy in model predictions across edges. This measure is then used to select superpixels for annotation. We demonstrate the effectiveness of EdgeAL on multi-class Optical Coherence Tomography (OCT) segmentation tasks, where we achieved a 99% dice score while reducing the annotation label cost to 12%, 2.3%, and 3%, respectively, on three publicly available datasets (Duke, AROI, and UMN). The source code is available at https://github.com/Mak-Ta-Reque/EdgeAL.},
year = {2023},
title = {EdgeAL: An Edge Estimation Based Active Learning Approach for OCT Segmentation},
booktitle = {International Conference on Medical Image Computing and Computer-Assisted Intervention. Medical Image Computing and Computer Assisted Intervention (MICCAI-2023)},
pages = {79-89},
publisher = {Springer, Cham},
author = {Md Abdul Kadir and Hasan Md Tusfiqur Alam and Daniel Sonntag},
organization = {Springer},
url = {https://rdcu.be/dvx8f https://www.dfki.de/fileadmin/user_upload/import/14635_paper1593.pdf}
}
H. Kath, B. Lüers, T. S. Gouvêa, und D. Sonntag, "Lost in Dialogue: A Review and Categorisation of Current Dialogue System Approaches and Technical Solutions" in Proc. KI 2023: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2023), 46th German Conference on AI, Berlin, Germany, September 26–29, 2023, Proceedings, located at 46th German Conference on AI, September 26-29, Berlin, Germany, 2023.
doi: 10.1007/978-3-031-42608-7_9
@inproceedings{pub14648, series = {Lecture Notes in Artificial Intelligence, LNAI},
abstract = {Dialogue systems are an important and very active research area with many practical applications. However, researchers and practitioners new to the field may have difficulty with the categorisation, number and terminology of existing free and commercial systems. Our paper aims to achieve two main objectives. Firstly, based on our structured literature review, we provide a categorisation of dialogue systems according to the objective, modality, domain, architecture, and model, and provide information on the correlations among these categories. Secondly, we summarise and compare frameworks and applications of intelligent virtual assistants, commercial frameworks, research dialogue systems, and large language models according to these categories and provide system recommendations for researchers new to the field.},
month = {9},
year = {2023},
title = {Lost in Dialogue: A Review and Categorisation of Current Dialogue System Approaches and Technical Solutions},
booktitle = {KI 2023: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2023), 46th German Conference on AI, Berlin, Germany, September 26–29, 2023, Proceedings, located at 46th German Conference on AI, September 26-29, Berlin, Germany},
editor = {Dietmar Seipel and Alexander Steen},
volume = {14236},
pages = {98-113},
isbn = {978-3-031-42607-0},
publisher = {Springer, Heidelberg},
doi = {https://doi.org/10.1007/978-3-031-42608-7_9},
author = {Hannes Kath and Bengt Lüers and Thiago S. Gouvêa and Daniel Sonntag},
keywords = {Dialogue System, Conversational AI, Task-oriented, Natural Language Processing, Survey},
url = {https://link.springer.com/chapter/10.1007/978-3-031-42608-7_9 https://www.dfki.de/fileadmin/user_upload/import/14648_978-3-031-42608-7-seiten-2.pdf}
}
M. Nayebi, K. Kuznetsov, A. Zeller, und G. Ruhe, "User Driven Functionality Deletion for Mobile Apps" in Proc. IEEE International Requirements Engineering Conference. IEEE International Requirements Engineering Conference (RE-2023), September 4-8, Hannover, Germany, 2023.
@inproceedings{pub14666, abstract = {Evolving software with an increasing number of features is harder to understand and thus harder to use. Software release planning has been concerned with planning these additions. Moreover, software of increasing size takes more effort to be maintained. In the domain of mobile apps, too much functionality can easily impact usability, maintainability, and resource consumption. Hence, it is important to understand the extent to which the law of continuous growth applies to mobile apps. Previous work showed that the deletion of functionality is common and sometimes driven by user reviews. However, it is unknown whether these deletions are visible or important to the app users. In this study, we surveyed 297 mobile app users to understand the significance of functionality deletion for them. Our results showed that for most users, the deletion of features corresponds with negative sentiments and change in usage and even churn. Motivated by these preliminary results, we propose Radiation to input user reviews and recommend if any functionality should be deleted from an app's User Interface (UI). We evaluate Radiation using historical data and surveying developers' opinions. From the analysis of 190,062 reviews from 115 randomly selected apps, we show that Radiation can recommend functionality deletion with an average F-Score of 74% and if sufficiently many negative user reviews suggest so.},
year = {2023},
title = {User Driven Functionality Deletion for Mobile Apps},
booktitle = {IEEE International Requirements Engineering Conference. IEEE International Requirements Engineering Conference (RE-2023), September 4-8, Hannover, Germany},
isbn = {979-8-3503-2689-5},
publisher = {IEEE},
author = {Maleknaz Nayebi and Konstantin Kuznetsov and Andreas Zeller and Guenther Ruhe},
keywords = {Mobile apps, Survey, App store mining, Software Release planning, Empirical software engineering},
url = {https://ieeexplore.ieee.org/document/10260783/}
}
L. Kopácsi, B. Baffy, G. Baranyi, J. Skaf, G. Sörös, S. Szeier, A. Lőrincz, und D. Sonntag, "Cross-Viewpoint Semantic Mapping: Integrating Human and Robot Perspectives for Improved 3D Semantic Reconstruction" Sensors - Open Access Journal (Sensors), vol. 23, iss. 11.
2023.
doi: 10.3390/s23115126
@article{pub14703, abstract = {Allocentric semantic 3D maps are highly useful for a variety of human–machine interaction related tasks since egocentric viewpoints can be derived by the machine for the human partner. Class labels and map interpretations, however, may differ or could be missing for the participants due to the different perspectives. Particularly, when considering the viewpoint of a small robot, which significantly differs from the viewpoint of a human. In order to overcome this issue, and to establish common ground, we extend an existing real-time 3D semantic reconstruction pipeline with semantic matching across human and robot viewpoints. We use deep recognition networks, which usually perform well from higher (i.e., human) viewpoints but are inferior from lower viewpoints, such as that of a small robot. We propose several approaches for acquiring semantic labels for images taken from unusual perspectives. We start with a partial 3D semantic reconstruction from the human perspective that we transfer and adapt to the small robot’s perspective using superpixel segmentation and the geometry of the surroundings. The quality of the reconstruction is evaluated in the Habitat simulator and a real environment using a robot car with an RGBD camera. We show that the proposed approach provides high-quality semantic segmentation from the robot’s perspective, with accuracy comparable to the original one. In addition, we exploit the gained information and improve the recognition performance of the deep network for the lower viewpoints and show that the small robot alone is capable of generating high-quality semantic maps for the human partner. The computations are close to real-time, so the approach enables interactive applications.},
number = {11},
month = {5},
year = {2023},
title = {Cross-Viewpoint Semantic Mapping: Integrating Human and Robot Perspectives for Improved 3D Semantic Reconstruction},
journal = {Sensors - Open Access Journal (Sensors)},
volume = {23},
pages = {1-17},
publisher = {MDPI},
doi = {https://doi.org/10.3390/s23115126},
author = {László Kopácsi and Benjámin Baffy and Gábor Baranyi and Joul Skaf and Gábor Sörös and Szilvia Szeier and András Lőrincz and Daniel Sonntag},
keywords = {3D semantic maps; semantic matching; superpixel segmentation; semantic segmentation; human–robot collaboration; real-time reconstruction; label transfer; computer vision; deep learning},
url = {https://doi.org/10.3390/s23115126 https://www.dfki.de/fileadmin/user_upload/import/14703_sensors-23-05126.pdf}
}
E. Bunde, D. Eisenhardt, D. Sonntag, H. Profitlich, und C. Meske, "Giving DIAnA More TIME – Guidance for the Design of XAI-Based Medical Decision Support Systems" in Proc. 18th International Conference on Design Science Research in Information Systems and Technology, DESRIST 2023. International Conference on Design Science Research in Information Systems and Technology (DESRIST-2023), May 31 - June 3, Pretoria, South Africa, 2023.
@inproceedings{pub14706, abstract = {Future healthcare ecosystems integrating human-centered artificial intelligence (AI) will be indispensable. AI-based healthcare technologies can sup- port diagnosis processes and make healthcare more accessible globally. In this con- text, we conducted a design science research project intending to introduce design principles for user interfaces (UIs) of explainable AI-based (XAI) medical deci- sion support systems (XAI-based MDSS). We used an archaeological approach to analyze the UI of an existing web-based system in the context of skin lesion classification called DIAnA (Dermatological Images – Analysis and Archiving). One of DIAnA’s unique characteristics is that it should be usable for the stake- holder groups of physicians and patients. We conducted the in-situ analysis with these stakeholders using the think-aloud method and semi-structured interviews. We anchored our interview guide in concepts of the Theory of Interactive Media Effects (TIME), which formulates UI features as causes and user psychology as effects. Based on the results, we derived 20 design requirements and developed nine design principles grounded in TIME for this class of XAI-based MDSS, either associated with the needs of physicians, patients, or both. Regarding evaluation, we first conducted semi-structured interviews with software developers to assess the reusability of our design principles. Afterward, we conducted a survey with user experience/interface designers. The evaluation uncovered that 77% of the participants would adopt the design principles, and 82% would recommend them to colleagues for a suitable project. The findings prove the reusability of the design principles and highlight a positive perception by potential implementers.},
month = {5},
year = {2023},
title = {Giving DIAnA More TIME – Guidance for the Design of XAI-Based Medical Decision Support Systems},
booktitle = {18th International Conference on Design Science Research in Information Systems and Technology, DESRIST 2023. International Conference on Design Science Research in Information Systems and Technology (DESRIST-2023), May 31 - June 3, Pretoria, South Africa},
publisher = {Springer Nature Switzerland},
author = {Enrico Bunde and Daniel Eisenhardt and Daniel Sonntag and Hans-Jürgen Profitlich and Christian Meske},
url = {https://scholar.google.de/citations?view_op=view_citation&hl=en&user=v7i6Uz4AAAAJ&sortby=pubdate&citation_for_view=v7i6Uz4AAAAJ:anf4URPfarAC https://www.dfki.de/fileadmin/user_upload/import/14706_"Giving_DIAnA_More_TIME__8211}
}
D. Sonntag, "Avoid Predatory Journals" KI - Künstliche Intelligenz, German Journal on Artificial Intelligence - Organ des Fachbereiches "Künstliche Intelligenz" der Gesellschaft für Informatik e.V. (KI), vol. 37.
2023.
@article{pub14707, abstract = {o.A.},
month = {5},
year = {2023},
title = {Avoid Predatory Journals},
journal = {KI - Künstliche Intelligenz, German Journal on Artificial Intelligence - Organ des Fachbereiches "Künstliche Intelligenz" der Gesellschaft für Informatik e.V. (KI)},
volume = {37},
pages = {1-3},
publisher = {Springer, Berlin Heidelberg},
author = {Daniel Sonntag},
url = {https://link.springer.com/article/10.1007/s13218-023-00805-w https://www.dfki.de/fileadmin/user_upload/import/14707_Avoid_Predatory_Journals.pdf}
}
M. A. Kadir, A. Mosavi, und D. Sonntag, "Evaluation Metrics for XAI: A Review, Taxonomy, and Practical Applications" in Proc. 2023 IEEE 27th International Conference on Intelligent Engineering Systems (INES). Conference on Intelligent Engineering Systems (INES-2023), July 26-28, Nairobi, Kenya, 2023.
doi: 10.1109/INES59282.2023.10297629
@inproceedings{pub14708, abstract = {Within the past few years, the accuracy of deep learning and machine learning models has been improving significantly while less attention has been paid to their responsibility, explainability, and interpretability. eXplainable Artificial Intelligence (XAI) methods, guidelines, concepts, and strategies offer the possibility of models' evaluation for improving fidelity, faithfulness, and overall explainability. Due to the diversity of data and learning methodologies, there needs to be a clear definition for the validity, reliability, and evaluation metrics of explainability. This article reviews evaluation metrics used for XAI through the PRISMA systematic guideline for a comprehensive and systematic literature review. Based on the results, this study suggests two taxonomy for the evaluation metrics. One taxonomy is based on the applications, and one is based on the evaluation metrics.},
year = {2023},
title = {Evaluation Metrics for XAI: A Review, Taxonomy, and Practical Applications},
booktitle = {2023 IEEE 27th International Conference on Intelligent Engineering Systems (INES). Conference on Intelligent Engineering Systems (INES-2023), July 26-28, Nairobi, Kenya},
pages = {000111-000124},
publisher = {IEEE},
doi = {https://doi.org/10.1109/INES59282.2023.10297629},
author = {Md Abdul Kadir and Amir Mosavi and Daniel Sonntag},
keywords = {Measurement;Deep learning;Systematics;Bibliographies;Taxonomy;Psychology;Reliability;XAI;machine learning;deep learning;explainable artificial intelligence;explainable AI;explainable machine learning;metrics;evaluation},
url = {https://ieeexplore.ieee.org/abstract/document/10297629 https://www.dfki.de/fileadmin/user_upload/import/14708_XAI_Evaluation_Metrics__Taxonomies__Concepts_and_Applications__INES_2023_-7.pdf}
}
M. A. Kadir, F. Nunnari, und D. Sonntag, Fine-tuning of explainable CNNs for skin lesion classification based on dermatologists' feedback towards increasing trust.
@misc{pub14709, abstract = {In this paper, we propose a CNN fine-tuning method which enables users to give simultaneous feedback on two outputs: the classification itself and the visual explanation for the classification. We present the effect of this feedback strategy in a skin lesion classification task and measure how CNNs react to the two types of user feedback. To implement this approach, we propose a novel CNN architecture that integrates the Grad-CAM technique for explaining the model's decision in the training loop. Using simulated user feedback, we found that fine-tuning our model on both classification and explanation improves visual explanation while preserving classification accuracy, thus potentially increasing the trust of users in using CNN-based skin lesion classifiers.},
year = {2023},
title = {Fine-tuning of explainable CNNs for skin lesion classification based on dermatologists' feedback towards increasing trust},
author = {Md Abdul Kadir and Fabrizio Nunnari and Daniel Sonntag},
status_notes = {preprint},
url = {https://arxiv.org/abs/2304.01399 https://www.dfki.de/fileadmin/user_upload/import/14709_2304.01399.pdf}
}
H. Kath, B. Lüers, T. Gouvea, und D. Sonntag, "A Virtual Reality Tool for Representing, Visualizing and Updating Deep Learning Models" DFKI2023.
@techreport{pub14710, series = {DFKI Research Reports, RR},
abstract = {Deep learning is ubiquitous, but its lack of transparency limits its impact on several potential application areas. We demonstrate a virtual reality tool for automating the process of assigning data inputs to different categories. A dataset is represented as a cloud of points in virtual space. The user explores the cloud through movement and uses hand gestures to categorise portions of the cloud. This triggers gradual movements in the cloud: points of the same category are attracted to each other, different groups are pushed apart, while points are globally distributed in a way that utilises the entire space. The space, time, and forces observed in virtual reality can be mapped to well-defined machine learning concepts, namely the latent space, the training epochs and the backpropagation. Our tool illustrates how the inner workings of deep neural networks can be made tangible and transparent. We expect this approach to accelerate the autonomous development of deep learning applications by end users in novel areas.},
month = {5},
year = {2023},
title = {A Virtual Reality Tool for Representing, Visualizing and Updating Deep Learning Models},
volume = {2305.15353v1},
pages = {8},
institution = {DFKI},
author = {Hannes Kath and Bengt Lüers and Thiago Gouvea and Daniel Sonntag},
keywords = {Virtual Reality · Annotation Tool · Latent Space · Representation Learning},
url = {https://doi.org/10.48550/arXiv.2305.15353 https://www.dfki.de/fileadmin/user_upload/import/14710_2305.15353.pdf}
}
H. Kath, T. Gouvea, und D. Sonntag, "A Deep Generative Model for Interactive Data Annotation through Direct Manipulation in Latent Space" DFKI2023.
@techreport{pub14711, series = {DFKI Research Reports, RR},
abstract = {The impact of machine learning (ML) in many fields of application is constrained by lack of annotated data. Among existing tools for ML-assisted data annotation, one little explored tool type relies on an analogy between the coordinates of a graphical user interface and the latent space of a neural network for interaction through direct manipulation. In the present work, we 1) expand the paradigm by proposing two new analogies: time and force as reflecting iterations and gradients of network training; 2) propose a network model for learning a compact graphical representation of the data that takes into account both its internal structure and user provided annotations; and 3) investigate the impact of model hyperparameters on the learned graphical representations of the data, identifying candidate model variants for a future user study.},
month = {5},
year = {2023},
title = {A Deep Generative Model for Interactive Data Annotation through Direct Manipulation in Latent Space},
volume = {2305.15337v1},
pages = {7},
institution = {DFKI},
author = {Hannes Kath and Thiago Gouvea and Daniel Sonntag},
keywords = {Deep Generative Model · Self-supervised Learning · Variational Autoencoder},
url = {https://doi.org/10.48550/arXiv.2305.15337 https://www.dfki.de/fileadmin/user_upload/import/14711_2305.15337.pdf}
}
H. Kath, S. Stone, S. Rapp, und P. Birkholz, "Carina – A Corpus of Aligned German Read Speech Including Annotations" in Proc. ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). International Conference on Acoustics, Speech and Signal Processing (ICASSP-2022), May 22-27, Singapore, Singapore, 2022.
doi: 10.1109/ICASSP43922.2022.9746160
@inproceedings{pub14712, abstract = {This paper presents the semi-automatically created Corpus of Aligned Read Speech Including Annotations (CARInA), a speech corpus based on the German Spoken Wikipedia Corpus (GSWC). CARInA tokenizes, consolidates and organizes the vast, but rather unstructured material contained in GSWC. The contents are grouped by annotation completeness, and extended by canonic, morphosyntactic and prosodic annotations. The annotations are provided in BPF and TextGrid format. It contains 194 hours of speech material from 327 speakers, of which 124 hours are fully phonetically aligned and 30 hours are fully aligned at all annotation levels. CARInA is freely available 1 , designed to grow and improve over time, and suitable for large-scale speech analyses or machine learning tasks as illustrated by two examples shown in this paper.},
month = {5},
year = {2022},
title = {Carina – A Corpus of Aligned German Read Speech Including Annotations},
booktitle = {ICASSP 2022 - 2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP). International Conference on Acoustics, Speech and Signal Processing (ICASSP-2022), May 22-27, Singapore, Singapore},
pages = {6157-6161},
publisher = {Institute of Electrical and Electronics Engineers (IEEE)},
doi = {https://doi.org/10.1109/ICASSP43922.2022.9746160},
author = {Hannes Kath and Simon Stone and Stefan Rapp and Peter Birkholz},
keywords = {CARInA, speech data, prosodic annotation},
url = {https://doi.org/10.1109/ICASSP43922.2022.9746160 https://www.dfki.de/fileadmin/user_upload/import/14712_Carina__A_Corpus_of_Aligned_German_Read_Speech_Including_Annotations.pdf}
}
M. A. Kadir, M. B. Chowdhury, J. A. Rashid, S. R. Shakil, und M. K. Rhaman, "An autonomous industrial robot for loading and unloading goods" in Proc. 2015 International Conference on Informatics, Electronics & Vision (ICIEV). International Conference on Informatics, Electronics & Vision (ICIEV-2015), 4th International Conference, June 15-18, Fukuoka, Japan, 2015.
@inproceedings{pub14629, abstract = {In industries loading and unloading of heavy loads manually is one of the most important task which turns out to be quite difficult, time-consuming and risky for humans. This paper illustrates the mechanical design of the industry based automated robot which include: Ackerman Steering Mechanism and Differential Mechanism. Ackerman Steering allows front two wheels to turn left and right in the track without going out of the track. Differential has been mounted with two back wheels and a DC motor has been used with its controller to start motion of the robot. The autonomous robot is designed to start its movement from a starting position where goods are loaded on it, then follow a path of white line drawn on black surface and unload goods by itself after reaching a destination place. Digital Line Following sensor has been mounted in front of the robot so that the sensor can detect path by emitting and receiving signals allowing it to move in the pre-defined track having left and right turns while carrying goods from starting position to the destination. The main objective is to load and unload heavy goods that has been achieved by two large linear actuators for producing required torque and force necessary to unload heavy loads up to 150kg sideways to the ground safely. Besides, the robot has been built up having the ability to avoid collision with any obstacles that come in its way. Building an industrial robot with moderate speed, good efficiency for loading and unloading purpose within a short time to ease human suffering has been the main focus of this paper.},
year = {2015},
title = {An autonomous industrial robot for loading and unloading goods},
booktitle = {2015 International Conference on Informatics, Electronics & Vision (ICIEV). International Conference on Informatics, Electronics & Vision (ICIEV-2015), 4th International Conference, June 15-18, Fukuoka, Japan},
pages = {1-6},
publisher = {IEEE},
author = {Md Abdul Kadir and Md Belayet Chowdhury and Jaber AL Rashid and Shifur Rahman Shakil and Md Khalilur Rhaman},
organization = {IEEE},
url = {https://www.dfki.de/fileadmin/user_upload/import/14629_An_Autonomous_Industrial_Robot_for_Loading_and.pdf https://dl.acm.org/doi/10.1109/ICIEV.2015.7333984#:~:text=We%20consider%20two%20mechanisms%20to%20procure}
H. M. D. Nguyen, H. Nguyen, M. T. N. Truong, T. Cao, B. T. Nguyen, N. Ho, P. Swoboda, S. Albarqouni, P. Xie, und D. Sonntag, "Joint Self-Supervised Image-Volume Representation Learning with Intra-Inter Contrastive Clustering" in Proc. Proceedings of the AAAI Conference on Artificial Intelligence (AAAI-2023). AAAI Conference on Artificial Intelligence (AAAI), February 7-14, Washington,, DC, USA, 2023.
@inproceedings{pub12923, abstract = {Collecting large-scale medical datasets with fully annotated samples for training of deep networks is prohibitively expensive, especially for 3D volume data. Recent breakthroughs in self-supervised learning (SSL) offer the ability to overcome the lack of labeled training samples by learning feature representations from unlabeled data. However, most current SSL techniques in the medical field have been designed for either 2D images or 3D volumes. In practice, this restricts the capability to fully leverage unlabeled data from numerous sources, which may include both 2D and 3D data. Additionally, the use of these pre-trained networks is constrained to downstream tasks with compatible data dimensions. In this paper, we propose a novel framework for unsupervised joint learning on 2D and 3D data modalities. Given a set of 2D images or 2D slices extracted from 3D volumes, we construct an SSL task based on a 2D contrastive clustering problem for distinct classes. The 3D volumes are exploited by computing vectored embedding at each slice and then assembling a holistic feature through deformable self-attention mechanisms in Transformer, allowing incorporating long-range dependencies between slices inside 3D volumes. These holistic features are further utilized to define a novel 3D clustering agreement-based SSL task and masking embedding prediction inspired by pre-trained language models. Experiments on downstream tasks, such as 3D brain segmentation, lung nodule detection, 3D heart structures segmentation, and abnormal chest X-ray detection, demonstrate the effectiveness of our joint 2D and 3D SSL approach. We improve plain 2D Deep-ClusterV2 and SwAV by a significant margin and also surpass various modern 2D and 3D SSL approaches.},
month = {2},
year = {2023},
title = {Joint Self-Supervised Image-Volume Representation Learning with Intra-Inter Contrastive Clustering},
booktitle = {Proceedings of the AAAI Conference on Artificial Intelligence (AAAI-2023). AAAI Conference on Artificial Intelligence (AAAI), February 7-14, Washington,, DC, USA},
publisher = {AAAI Press},
author = {Ho Minh Duy Nguyen and Hoang Nguyen and Mai T. N. Truong and Tri Cao and Binh T. Nguyen and Nhat Ho and Paul Swoboda and Shadi Albarqouni and Pengtao Xie and Daniel Sonntag},
url = {https://arxiv.org/pdf/2212.01893.pdf https://www.dfki.de/fileadmin/user_upload/import/12923_Joint_Self-Supervised_Image-Volume_Representation_Learning.pdf}
}
H. Tabrizchi, J. Razmara, und A. Mosavi, "Thermal prediction for energy management of clouds using a hybrid model based on CNN and stacking multi-layer bi-directional LSTM" Energy Reports, vol. 9.
2023.
doi: 10.1016/j.egyr.2023.01.032
@article{pub12997, abstract = {The fast advancement of technology and developers’ utilization of data centers have dramatically increased energy usage in today’s society. Thermal control is a key issue in hyper-scale cloud data centers. Hotspots form when the temperature of the host rises, increasing cooling costs and affecting dependability. Precise estimation of host temperatures is critical for optimal resource management. Thermal changes in the data center make estimating temperature a difficult challenge. Existing temperature estimating algorithms are ineffective due to their processing complexity as well as lack of accuracy. Regarding that data-driven approaches seem promising for temperature prediction, this research offers a unique efficient temperature prediction model. The model uses a combination of convolutional neural networks (CNN) and stacking multi-layer bi-directional long-term short memory (BiLSTM) for thermal prediction. The findings of the experiments reveal that the model successfully anticipates the temperature with the highest value of 97.15% and the lowest error rate of RMSE value of 0.2892, and an RMAE of 0.5003, which decreases the projection error as opposed to the other method.},
month = {12},
year = {2023},
title = {Thermal prediction for energy management of clouds using a hybrid model based on CNN and stacking multi-layer bi-directional LSTM},
journal = {Energy Reports},
volume = {9},
pages = {2253-2268},
publisher = {Elsevier},
doi = {https://doi.org/10.1016/j.egyr.2023.01.032},
author = {Hamed Tabrizchi and Jafar Razmara and Amirhosein Mosavi},
keywords = {Deep learning, Energy management, Cloud computing, Thermal predictio},
url = {https://www.researchgate.net/publication/367260355_Thermal_prediction_for_energy_management_of_clouds_using_a_hybrid_model_based_on_CNN_and_stacking_multi-layer_bi-directional_LSTM https://www.dfki.de/fileadmin/user_upload/import/12997_Thermal_prediction_for_energy_management_of_clouds_using_a_hybrid.pdf}
}
T. Hai, B. T. Sayed, A. Majdi, J. Zhou, R. Sagban, S. S. Band, und A. Mosavi, "An integrated GIS-based multivariate adaptive regression splines-cat swarm optimization for improving the accuracy of wildfire susceptibility mapping" Geocarto International, vol. 38, iss. 1, 2167005.
2023.
doi: 10.1080/10106049.2023.2167005
@article{pub13025, abstract = {A hybrid machine learning method is proposed for wildfire susceptibility mapping. For modeling a geographical information system (GIS) database including 11 influencing factors and 262 fire locations from 2013 to 2018 is used for developing an integrated multivariate adaptive regression splines (MARS). The cat swarm optimization (CSO) algorithm tunes the parameters of the MARS in order to generate accurate susceptibility maps. From the Pearson correlation results, it is observed that land use, temperature, and slope angle have strong correlation with the fire severity. The results demonstrate that the prediction capability of the MARS-CSO model outperforms model tree, reduced error pruning tree and MARS. The resulting wildfire risk map using MARS-CSO reveals that 20% of the study areas is categorized in the very low wildfire risk class, whereas 40% is under the very high class of fire hazard.},
number = {1, 2167005},
year = {2023},
title = {An integrated GIS-based multivariate adaptive regression splines-cat swarm optimization for improving the accuracy of wildfire susceptibility mapping},
journal = {Geocarto International},
volume = {38},
pages = {1-25},
publisher = {Taylor & Francis},
doi = {https://doi.org/10.1080/10106049.2023.2167005},
author = {Tao Hai and Biju Theruvil Sayed and Ali Majdi and Jincheng Zhou and Rafid Sagban and Shahab S. Band and Amirhosein Mosavi},
keywords = {Wildfire susceptibility, geospatial, machine learning, cat swarm optimization, artificial intelligence, natural hazard},
url = {https://www.tandfonline.com/doi/full/10.1080/10106049.2023.2167005 https://www.dfki.de/fileadmin/user_upload/import/13025_An_integrated_GIS-based_multivariate_adaptive_regressionsplines-cat_swarm_optimization_for_improving_theaccuracy_of_wildfire_susceptibility_mapping.pdf}
}
M. Gholami, E. Ghanbari-Adivi, M. Ehteram, V. P. Singh, A. N. Ahmed, A. Mosavi, und A. El-Shafie, "Predicting longitudinal dispersion coefficient using ensemble models and optimized multi-layer perceptron models" Ain Shams Engineering Journal (ASEJ), vol. 10.
2023.
doi: 10.1016/j.asej.2023.102223
@article{pub13154, abstract = {Prediction of the longitudinal dispersion coefficient (LDC) is essential for the river and water resources engineering and environmental management. This study proposes ensemble models for predicting LDC based on multilayer perceptron (MULP) methods and optimization algorithms. The honey badger optimization algorithm (HBOA), salp swarm algorithm (SASA), firefly algorithm (FIFA), and particle swarm optimization algorithm (PASOA) are used to adjust the MULP parameters. Then, the outputs of the MULP-HBOA, MULP-SASA, MULP-PASOA, MULP-FIFA, and MULP models were incorporated into an inclusive multiple model (IMM). For IMM at the testing level, the mean absolute error (MEAE) was 15, whereas it was 17, 18, 23, 24, and 25 for the MULP-HBOA, MULP-SASA, MULP-FIFA, MULP-PASOA, and MULP models. The study also modified the structure of MULP models using a goodness factor which decreased the CPU time. Removing redundant neurons reduces CPU time. Thus, the modified ANN model and the suggested IMM model can decrease the computational time and further improve the performance of models},
month = {4},
year = {2023},
title = {Predicting longitudinal dispersion coefficient using ensemble models and optimized multi-layer perceptron models},
journal = {Ain Shams Engineering Journal (ASEJ)},
volume = {10},
pages = {2253-2277},
publisher = {Elsevier},
doi = {https://doi.org/10.1016/j.asej.2023.102223},
author = {Mahsa Gholami and Elham Ghanbari-Adivi and Mohammad Ehteram and Vijay P. Singh and Ali Najah Ahmed and Amirhosein Mosavi and Ahmed El-Shafie},
keywords = {Longitudinal dispersion coefficient, Multilayer perceptron, Optimization, Artificial intelligence, Machine learning, Deep learning, Big data},
url = {https://www.sciencedirect.com/science/article/pii/S2090447923001120 https://www.dfki.de/fileadmin/user_upload/import/13154_Predicting_longitudinal_dispersion_coefficient_using_ensemble_models.pdf}
}
H. Mirhashemi, M. Heydari, O. Karami, K. Ahmadi, und A. Mosavi, "Modeling Climate Change Effects on the Distribution of Oak Forests with Machine Learning" Forests, vol. 14, iss. 3.
2023.
doi: 10.3390/f14030469
@article{pub13155, abstract = {The present study models the effect of climate change on the distribution of Persian oak (Quercus brantii Lindl.) in the Zagros forests, located in the west of Iran. The modeling is conducted under the current and future climatic conditions by fitting the machine learning method of the Bayesian additive regression tree (BART). For the anticipation of the potential habitats for the Persian oak, two general circulation models (GCMs) of CCSM4 and HADGEM2-ES under the representative concentration pathways (RCPs) of 2.6 and 8.5 for 2050 and 2070 are used. The mean temperature (MT) of the wettest quarter (bio8), solar radiation, slope and precipitation of the wettest month (bio13) are respectively reported as the most important variables in the modeling. The results indicate that the suitable habitat of Persian oak will significantly decrease in the future under both climate change scenarios as much as 75.06% by 2070. The proposed study brings insight into the current condition and further projects the future conditions of the local forests for proper management and protection of endangered ecosystems.},
number = {3},
year = {2023},
title = {Modeling Climate Change Effects on the Distribution of Oak Forests with Machine Learning},
journal = {Forests},
volume = {14},
pages = {13220-13233},
publisher = {MDPI},
doi = {https://doi.org/10.3390/f14030469},
author = {Hengameh Mirhashemi and Mehdi Heydari and Omid Karami and Kourosh Ahmadi and Amirhosein Mosavi},
keywords = {species distribution; climate change; Bayesian; machine learning; artificial intelligence; deep learning; mathematics; forest; big data; data science},
url = {https://www.mdpi.com/1999-4907/14/3/469 https://www.dfki.de/fileadmin/user_upload/import/13155_Modeling_Climate_Change_Effects_on_the_Distribution_of_Oak_Forests_with_Machine_Learning.pdf}
}
K. Altmeyer, M. Barz, L. Lauer, M. Peschel, D. Sonntag, R. Brünken, und S. Malone, "Digital ink and differentiated subjective ratings for cognitive load measurement in middle childhood" British Journal of Educational Psychology, vol. n/a, p. 18.
2023.
doi: 10.1111/bjep.12595
@article{pub13195, abstract = {Abstract Background New methods are constantly being developed to adapt cognitive load measurement to different contexts. However, research on middle childhood students' cognitive load measurement is rare. Research indicates that the three cognitive load dimensions (intrinsic, extraneous, and germane) can be measured well in adults and teenagers using differentiated subjective rating instruments. Moreover, digital ink recorded by smartpens could serve as an indicator for cognitive load in adults. Aims With the present research, we aimed at investigating the relation between subjective cognitive load ratings, velocity and pressure measures recorded with a smartpen, and performance in standardized sketching tasks in middle childhood students. Sample Thirty-six children (age 7–12) participated at the university's laboratory. Methods The children performed two standardized sketching tasks, each in two versions. The induced intrinsic cognitive load or the extraneous cognitive load was varied between the versions. Digital ink was recorded while the children drew with a smartpen on real paper and after each task, they were asked to report their perceived intrinsic and extraneous cognitive load using a newly developed 5-item scale. Results Results indicated that cognitive load ratings as well as velocity and pressure measures were substantially related to the induced cognitive load and to performance in both sketching tasks. However, cognitive load ratings and smartpen measures were not substantially related. Conclusions Both subjective rating and digital ink hold potential for cognitive load and performance measurement. However, it is questionable whether they measure the exact same constructs.},
year = {2023},
title = {Digital ink and differentiated subjective ratings for cognitive load measurement in middle childhood},
note = {e12595 BJEP.22.0172},
journal = {British Journal of Educational Psychology},
volume = {n/a},
pages = {18},
publisher = {John Wiley & Sons, Ltd},
doi = {https://doi.org/10.1111/bjep.12595},
author = {Kristin Altmeyer and Michael Barz and Luisa Lauer and Markus Peschel and Daniel Sonntag and Roland Brünken and Sarah Malone},
keywords = {assessment, cognitive load measurement, extraneous load, intrinsic load, primary school, smartpen},
url = {https://bpspsychub.onlinelibrary.wiley.com/doi/abs/10.1111/bjep.12595 https://www.dfki.de/fileadmin/user_upload/import/13195_Brit_J_of_Edu_Psychol_-_2023_-_Altmeyer_-_Digital_ink_and_differentiated_subjective_ratings_for_cognitive_load_measurement.pdf}
}
M. Barz, O. S. Bhatti, H. M. T. Alam, H. M. D. Nguyen, und D. Sonntag, "Interactive Fixation-to-AOI Mapping for Mobile Eye Tracking Data Based on Few-Shot Image Classification" in Proc. Companion Proceedings of the 28th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2023), Sydney,, NSW, Australia, 2023.
doi: 10.1145/3581754.3584179
@inproceedings{pub13196, series = {IUI '23 Companion},
abstract = {Mobile eye tracking is an important tool in psychology and human-centred interaction design for understanding how people process visual scenes and user interfaces. However, analysing recordings from mobile eye trackers, which typically include an egocentric video of the scene and a gaze signal, is a time-consuming and largely manual process. To address this challenge, we propose a web-based annotation tool that leverages few-shot image classification and interactive machine learning (IML) to accelerate the annotation process. The tool allows users to efficiently map fixations to areas of interest (AOI) in a video-editing-style interface. It includes an IML component that generates suggestions and learns from user feedback using a few-shot image classification model initialised with a small number of images per AOI. Our goal is to improve the efficiency and accuracy of fixation-to-AOI mapping in mobile eye tracking.},
year = {2023},
title = {Interactive Fixation-to-AOI Mapping for Mobile Eye Tracking Data Based on Few-Shot Image Classification},
booktitle = {Companion Proceedings of the 28th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2023), Sydney,, NSW, Australia},
pages = {175-178},
isbn = {9798400701078},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3581754.3584179},
author = {Michael Barz and Omair Shahzad Bhatti and Hasan Md Tusfiqur Alam and Ho Minh Duy Nguyen and Daniel Sonntag},
keywords = {visual attention, interactive machine learning, eye tracking, area of interest, fixation to AOI mapping, mobile eye tracking, eye tracking data analysis},
url = {https://doi.org/10.1145/3581754.3584179 https://www.dfki.de/fileadmin/user_upload/import/13196_3581754.3584179.pdf}
}
M. A. Kadir, A. M. Selim, M. Barz, und D. Sonntag, "A User Interface for Explaining Machine Learning Model Explanations" in Proc. Companion Proceedings of the 28th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2023), March 27-31, Sydney,, NSW, Australia, 2023.
doi: 10.1145/3581754.3584131
@inproceedings{pub13200, series = {IUI'23 Companion},
abstract = {Explainable Artificial Intelligence (XAI) is an emerging subdiscipline of Machine Learning (ML) and human-computer interaction. Discriminative models need to be understood. An explanation of such ML models is vital when an AI system makes decisions that have significant consequences, such as in healthcare or finance. By providing an input-specific explanation, users can gain confidence in an AI system’s decisions and be more willing to trust and rely on it. One problem is that interpreting example-based explanations for discriminative models, such as saliency maps, can be difficult because it is not always clear how the highlighted features contribute to the model’s overall prediction or decisions. Moreover, saliency maps, which are state-of-the-art visual explanation methods, do not provide concrete information on the influence of particular features. We propose an interactive visualisation tool called EMILE-UI that allows users to evaluate the provided explanations of an image-based classification task, specifically those provided by saliency maps. This tool allows users to evaluate the accuracy of a saliency map by reflecting the true attention or focus of the corresponding model. It visualises the relationship between the ML model and its explanation of input images, making it easier to interpret saliency maps and understand how the ML model actually predicts. Our tool supports a wide range of deep learning image classification models and image data as inputs.},
month = {3},
year = {2023},
title = {A User Interface for Explaining Machine Learning Model Explanations},
booktitle = {Companion Proceedings of the 28th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2023), March 27-31, Sydney,, NSW, Australia},
volume = {Companion Proceedings of the 28th International Conference on Intelligent User Interfaces},
pages = {59-63},
isbn = {9798400701078},
publisher = {Association for Computing Machinery, New York, NY, United States},
doi = {https://doi.org/10.1145/3581754.3584131},
author = {Md Abdul Kadir and Abdulrahman Mohamed Selim and Michael Barz and Daniel Sonntag},
keywords = {Transparency, Explainability, ML, AI, Trustworthiness, Interpretability},
url = {https://doi.org/10.1145/3581754.3584131 https://www.dfki.de/fileadmin/user_upload/import/13200_iui23companion.pdf}
}
L. Kopácsi, M. Barz, O. S. Bhatti, und D. Sonntag, "IMETA: An Interactive Mobile Eye Tracking Annotation Method for Semi-Automatic Fixation-to-AOI Mapping" in Proc. Companion Proceedings of the 28th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2023), March 27-31, Sydney,, NSW, Australia, 2023.
doi: 10.1145/3581754.3584125
@inproceedings{pub13201, series = {IUI '23 Companion},
abstract = {Mobile eye tracking studies involve analyzing areas of interest (AOIs) and visual attention to these AOIs to understand how people process visual information. However, accurately annotating the data collected for user studies can be a challenging and time-consuming task. Current approaches for automatically or semi-automatically analyzing head-mounted eye tracking data in mobile eye tracking studies have limitations, including a lack of annotation flexibility or the inability to adapt to specific target domains. To address this problem, we present IMETA, an architecture for semi-automatic fixation-to-AOI mapping. When an annotator assigns an AOI label to a sequence of frames based on the respective fixation points, an interactive video object segmentation method is used to estimate the mask proposal of the AOI. Then, we use the 3D reconstruction of the visual scene created from the eye tracking video to map these AOI masks to 3D. The resulting 3D segmentation of the AOI can be used to suggest labels for the rest of the video, with the suggestions becoming increasingly accurate as more samples are provided by an annotator using interactive machine learning (IML). IMETA has the potential to reduce the annotation workload and speed up the evaluation of mobile eye tracking studies.},
year = {2023},
title = {IMETA: An Interactive Mobile Eye Tracking Annotation Method for Semi-Automatic Fixation-to-AOI Mapping},
booktitle = {Companion Proceedings of the 28th International Conference on Intelligent User Interfaces. International Conference on Intelligent User Interfaces (IUI-2023), March 27-31, Sydney,, NSW, Australia},
pages = {33-36},
isbn = {9798400701078},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3581754.3584125},
author = {László Kopácsi and Michael Barz and Omair Shahzad Bhatti and Daniel Sonntag},
keywords = {3D reconstruction, fixation to aoi mapping, areas of interest, video object segmentation, annotation, interactive machine learning, mobile eye tracking},
url = {https://doi.org/10.1145/3581754.3584125 https://www.dfki.de/fileadmin/user_upload/import/13201_3581754.3584125.pdf}
}
T. Gouvea, H. Kath, I. Troshani, B. Lüers, P. P. Serafini, I. B. Campos, A. S. Afonso, S. M. F. M. Leandro, L. Swanepoel, N. Theron, A. M. Swemmer, und D. Sonntag, "Interactive Machine Learning Solutions for Acoustic Monitoring of Animal Wildlife in Biosphere Reserves" in Proc. Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence. International Joint Conference on Artificial Intelligence (IJCAI-2023), located at IJCAI, August 19-25, Macao, Macao, 2023.
@inproceedings{pub13356, abstract = {Biodiversity loss is taking place at accelerated rates globally, and a business-as-usual trajectory will lead to missing internationally established conservation goals. Biosphere reserves are sites designed to be of global significance in terms of both the biodiversity within them and their potential for sustainable development, and are therefore ideal places for the development of local solutions to global challenges. While the protection of biodiversity is a primary goal of biosphere reserves, adequate information on the state and trends of biodiversity remains a critical gap for adaptive management in biosphere reserves. Passive acoustic monitoring (PAM) is an increasingly popular method for continued, reproducible, scalable, and cost-effective monitoring of animal wildlife. PAM adoption is on the rise, but its data management and analysis requirements pose a barrier for adoption for most agencies tasked with monitoring biodiversity. As an interdisciplinary team of machine learning scientists and ecologists experienced with PAM and working at biosphere reserves in marine and terrestrial ecosystems on three different continents, we report on the co-development of interactive machine learning tools for semi-automated assessment of animal wildlife.},
year = {2023},
title = {Interactive Machine Learning Solutions for Acoustic Monitoring of Animal Wildlife in Biosphere Reserves},
booktitle = {Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence. International Joint Conference on Artificial Intelligence (IJCAI-2023), located at IJCAI, August 19-25, Macao, Macao},
publisher = {International Joint Conferences on Artificial Intelligence},
author = {Thiago Gouvea and Hannes Kath and Ilira Troshani and Bengt Lüers and Patrícia P. Serafini and Ivan B. Campos and André S. Afonso and Sérgio M. F. M. Leandro and Lourens Swanepoel and Nicholas Theron and Anthony M. Swemmer and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/13356_IJCAI_ProjectProposal_PAM_in_Biosphere_Reserves.pdf https://www.ijcai.org/proceedings/2023/711}
}
E. van Zoelen, T. Mioch, M. Tajaddini, C. Fleiner, S. Tsaneva, P. Camin, T. Gouvea, K. Baraka, M. H. T. de Boer, und M. A. Neerincx, "Developing Team Design Patterns for Hybrid Intelligence Systems" in Proc. Frontiers in Artificial Intelligence and Applications. International Conference on Hybrid Human-Artificial Intelligence (HHAI-2023), June 26-30, Munich, Germany, 2023.
doi: 10.3233/FAIA230071
@inproceedings{pub13357, abstract = {With artificial intelligence (AI) systems entering our working and leisure environments with increasing adaptation and learning capabilities, new opportunities arise for developing hybrid (human-AI) intelligence (HI) systems, comprising new ways of collaboration. However, there is not yet a structured way of specifying design solutions of collaboration for hybrid intelligence (HI) systems and there is a lack of best practices shared across application domains. We address this gap by investigating the generalization of specific design solutions into design patterns that can be shared and applied in different contexts. We present a human-centered bottom-up approach for the specification of design solutions and their abstraction into team design patterns. We apply the proposed approach for 4 concrete HI use cases and show the successful extraction of team design patterns that are generalizable, providing re-usable design components across various domains. This work advances previous research on team design patterns and designing applications of HI systems.},
year = {2023},
title = {Developing Team Design Patterns for Hybrid Intelligence Systems},
booktitle = {Frontiers in Artificial Intelligence and Applications. International Conference on Hybrid Human-Artificial Intelligence (HHAI-2023), June 26-30, Munich, Germany},
publisher = {IOS Press},
doi = {https://doi.org/10.3233/FAIA230071},
author = {Emma van Zoelen and Tina Mioch and Mani Tajaddini and Christian Fleiner and Stefani Tsaneva and Pietro Camin and Thiago Gouvea and Kim Baraka and Maaike H.T. de Boer and Mark A. Neerincx},
keywords = {Hybrid Intelligence, Team Design Patterns, Use-case based research, Human-centered AI, Co-evolution, Interdependence},
url = {https://ebooks.iospress.nl/doi/10.3233/FAIA230071 https://www.dfki.de/fileadmin/user_upload/import/13357_Van_Zoelen_et_al_2023_Developing_Team_Design_Patterns_for_Hybrid_Intelligence_Systems.pdf}
}
H. Kath, T. Gouvea, und D. Sonntag, "A Human-in-the-Loop Tool for Annotating Passive Acoustic Monitoring Datasets" in Proc. Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence. International Joint Conference on Artificial Intelligence (IJCAI-2023), located at IJCAI, August 19-25, Macao, China, 2023.
@inproceedings{pub13395, abstract = {Deep learning methods are well suited for data analysis in several domains, but application is often limited by technical entry barriers and the availability of large annotated datasets. We present an interactive machine learning tool for annotating passive acoustic monitoring datasets created for wildlife monitoring, which are time-consuming and costly to annotate manually. The tool, designed as a web application, consists of an interactive user interface implementing a human-in-the-loop workflow. Class label annotations provided manually as bounding boxes drawn over a spectrogram are consumed by a deep generative model (DGM) that learns a low-dimensional representation of the input data, as well as the available class labels. The learned low-dimensional representation is displayed as an interactive interface element, where new bounding boxes can be efficiently generated by the user with lasso-selection; alternatively, the DGM can propose new, automatically generated bounding boxes on demand. The user can accept, edit, or reject annotations suggested by the model, thus owning final judgement. Generated annotations can be used to fine-tune the underlying model, thus closing the loop. Investigations of the prediction accuracy and first empirical experiments show promising results on an artificial data set, laying the ground for application to a real life scenario.},
year = {2023},
title = {A Human-in-the-Loop Tool for Annotating Passive Acoustic Monitoring Datasets},
booktitle = {Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence. International Joint Conference on Artificial Intelligence (IJCAI-2023), located at IJCAI, August 19-25, Macao, China},
publisher = {International Joint Conferences on Artificial Intelligence},
author = {Hannes Kath and Thiago Gouvea and Daniel Sonntag},
url = {https://www.ijcai.org/proceedings/2023/835 https://www.dfki.de/fileadmin/user_upload/import/13395_Kath_et_al_2023_A_Human-in-the-Loop_Tool_for_Annotating_Passive_Acoustic_Monitoring_Datasets.pdf}
}
S. Liang, M. Hartmann, und D. Sonntag, "Cross-domain German Medical Named Entity Recognition using a Pre-Trained Language Model and Unified Medical Semantic Types" in Proc. Association for Computational Linguistics. Clinical Natural Language Processing Workshop (ClinicalNLP-2023), July 9-14, Toronto,, ON, Canada, 2023.
@inproceedings{pub13402, abstract = {nformation extraction from clinical text has the potential to facilitate clinical research and personalized clinical care, but annotating large amounts of data for each set of target tasks is prohibitive. We present a German medical Named Entity Recognition (NER) system capable of cross-domain knowledge transferring. The system builds on a pre-trained German language model and a token-level binary classifier, employing semantic types sourced from the Unified Medical Language System (UMLS) as entity labels to identify corresponding entity spans within the input text. To enhance the system’s performance and robustness, we pre-train it using a medical literature corpus that incorporates UMLS semantic term annotations. We evaluate the system’s effectiveness on two German annotated datasets obtained from different clinics in zero- and few-shot settings. The results show that our approach outperforms task-specific Condition Random Fields (CRF) classifiers in terms of accuracy. Our work contributes to developing robust and transparent German medical NER models that can support the extraction of information from various clinical texts.},
year = {2023},
title = {Cross-domain German Medical Named Entity Recognition using a Pre-Trained Language Model and Unified Medical Semantic Types},
booktitle = {Association for Computational Linguistics. Clinical Natural Language Processing Workshop (ClinicalNLP-2023), July 9-14, Toronto,, ON, Canada},
publisher = {ACL},
author = {Siting Liang and Mareike Hartmann and Daniel Sonntag},
url = {https://aclanthology.org/2023.clinicalnlp-1.31/ https://www.dfki.de/fileadmin/user_upload/import/13402_2023.clinicalnlp-1.31.pdf}
}
A. Anagnostopoulou, M. Hartmann, und D. Sonntag, "Towards Adaptable and Interactive Image Captioning with Data Augmentation and Episodic Memory" in Proc. Proceedings of The Fourth Workshop on Simple and Efficient Natural Language Processing (SustaiNLP). ACL Workshop on Simple and Efficient Natural Language Processing (SustaiNLP-2023), located at Annual Meeting of the Association for Computational Linguistics 2023, July 13, Toronto, Canada, 2023.
@inproceedings{pub13420, abstract = {Interactive machine learning (IML) is a beneficial learning paradigm in cases of limited data availability, as human feedback is incrementally integrated into the training process. In this paper, we present an IML pipeline for image captioning which allows us to incrementally adapt a pre-trained image captioning model to a new data distribution based on user input. In order to incorporate user input into the model, we explore the use of a combination of simple data augmentation methods to obtain larger data batches for each newly annotated data instance and implement continual learning methods to prevent catastrophic forgetting from repeated updates. For our experiments, we split a domain-specific image captioning dataset, namely VizWiz, into non-overlapping parts to simulate an incremental input flow for continually adapting the model to new data. We find that, while data augmentation worsens results, even when relatively small amounts of data are available, episodic memory is an effective strategy to retain knowledge from previously seen clusters.},
month = {7},
year = {2023},
title = {Towards Adaptable and Interactive Image Captioning with Data Augmentation and Episodic Memory},
booktitle = {Proceedings of The Fourth Workshop on Simple and Efficient Natural Language Processing (SustaiNLP). ACL Workshop on Simple and Efficient Natural Language Processing (SustaiNLP-2023), located at Annual Meeting of the Association for Computational Linguistics 2023, July 13, Toronto, Canada},
publisher = {Association for Computational Linguistics},
author = {Aliki Anagnostopoulou and Mareike Hartmann and Daniel Sonntag},
url = {https://aclanthology.org/2023.sustainlp-1.19/ https://www.dfki.de/fileadmin/user_upload/import/13420__SustaiNLP__Interactive_Image_Captioning.pdf}
}
C. A. Johns, M. Barz, und D. Sonntag, "Interactive Link Prediction as a Downstream Task for Foundational GUI Understanding Models" in Proc. KI 2023: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2023), Berlin, Germany, 2023.
doi: 10.1007/978-3-031-42608-7_7
@inproceedings{pub13988, abstract = {AI models that can recognize and understand the semantics of graphical user interfaces (GUIs) enable a variety of use cases ranging from accessibility to automation. Recent efforts in this domain have pursued the development of a set of foundation models: generic GUI understanding models that can be used off-the-shelf to solve a variety of GUI-related tasks, including ones that they were not trained on. In order to develop such foundation models, meaningful downstream tasks and baselines for GUI-related use cases will be required. In this paper, we present interactive link prediction as a downstream task for GUI understanding models and provide baselines as well as testing tools to effectively and efficiently evaluate predictive GUI understanding models. In interactive link prediction, the task is to predict whether tapping on an element on one screen of a mobile application (source element) navigates the user to a second screen (target screen). If this task is solved sufficiently, it can demonstrate an understanding of the relationship between elements and components across screens and enable various applications in GUI design automation and assistance. To encourage and support research on interactive link prediction, this paper contributes (1) a pre-processed large-scale dataset of links in mobile applications (18,830 links from 5,362 applications) derived from the popular RICO dataset, (2) performance baselines from five heuristic-based and two learning-based GUI understanding models, (3) a small-scale dataset of links in mobile GUI prototypes including ratings from an online study with 36 end-users for out-of-sample testing, and (4) a Figma plugin that can leverage link prediction models to automate and assist mobile GUI prototyping.},
month = {9},
year = {2023},
title = {Interactive Link Prediction as a Downstream Task for Foundational GUI Understanding Models},
booktitle = {KI 2023: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2023), Berlin, Germany},
editor = {Dietmar Seipel and Alexander Steen},
pages = {75-89},
isbn = {978-3-031-42608-7},
publisher = {Springer Nature Switzerland, Cham},
doi = {https://doi.org/10.1007/978-3-031-42608-7_7},
author = {Christoph Albert Johns and Michael Barz and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/13988_KI__23___Link_Prediction_as_a_Downstream_Task_in_GUI_Understanding_(2).pdf https://link.springer.com/chapter/10.1007/978-3-031-42608-7_7}
}
K. Kuznetsov, M. Barz, und D. Sonntag, "Detection of Contract Cheating in Pen-and-Paper Exams through the Analysis of Handwriting Style" in Proc. Companion Publication of the 25th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2023), October 9-13, Paris, France, 2023.
doi: 10.1145/3610661.3617162
@inproceedings{pub14164, series = {ICMI '23 Companion},
abstract = {Contract cheating, i.e., when a student employs another person to participate in an exam, appears to become a growing problem in academia. Cases of paid test takers are repeatedly reported in the media, but the number of unreported cases is unclear. Proctoring systems as a countermeasure are typically not appreciated by students and teachers because they may violate the students' privacy and can be imprecise and nontransparent. In this work, we propose to use automatic handwriting analysis based on digital ballpoint pens to identify individuals during exams unobtrusively. We implement a system that enables continuous authentication of the user during exams. We use a deep neural network architecture to model a user's handwriting style. An evaluation based on the large Deepwriting dataset shows that our system can successfully differentiate between the handwriting styles of different authors and hence detect simulated cases of contract cheating. In addition, we conducted a small validation study using digital ballpoint pens to assess the system's reliability in a more realistic environment.},
year = {2023},
title = {Detection of Contract Cheating in Pen-and-Paper Exams through the Analysis of Handwriting Style},
booktitle = {Companion Publication of the 25th International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2023), October 9-13, Paris, France},
pages = {26-30},
isbn = {9798400703218},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3610661.3617162},
author = {Konstantin Kuznetsov and Michael Barz and Daniel Sonntag},
keywords = {Digital Pens, Contract Cheating, Writer Identification, Proctoring Solutions},
url = {https://doi.org/10.1145/3610661.3617162 https://www.dfki.de/fileadmin/user_upload/import/14164_ICMI23_contract_cheating.pdf}
}
H. M. D. Nguyen, H. Nguyen, N. T. Diep, T. Pham, T. Cao, B. T. Nguyen, P. Swoboda, N. Ho, S. Albarqouni, P. Xie, D. Sonntag, und M. Niepert, "LVM-Med: Learning Large-Scale Self-Supervised Vision Models for Medical Imaging via Second-order Graph Matching" in Proc. The Thirty-Seventh Annual Conference on Neural Information Processing Systems (NeurIPS 2023). Neural Information Processing Systems (NeurIPS), December 10-16, USA, 2023.
@inproceedings{pub14309, abstract = {Obtaining large pre-trained models that can be fine-tuned to new tasks with limited annotated samples has remained an open challenge for medical imaging data. While pre-trained deep networks on ImageNet and vision-language foundation models trained on web-scale data are prevailing approaches, their effectiveness on medical tasks is limited due to the significant domain shift between natural and medical im- ages. To bridge this gap, we introduce LVM-Med, the first family of deep networks trained on large-scale medical datasets. We have collected approximately 1.3 million medical images from 55 publicly available datasets, covering a large number of organs and modalities such as CT, MRI, X-ray, and Ultrasound. We benchmark several state-of-the-art self-supervised algorithms on this dataset and propose a novel self-supervised contrastive learning algorithm using a graph-matching formulation. The proposed approach makes three contributions: (i) it integrates prior pair-wise image similarity metrics based on local and global information; (ii) it captures the structural constraints of feature embeddings through a loss function constructed via a combinatorial graph-matching objective; and (iii) it can be trained efficiently end-to-end using modern gradient-estimation techniques for black-box solvers. We thoroughly evaluate the proposed LVM-Med on 15 downstream medical tasks ranging from segmentation and classification to object detection, and both for the in and out-of-distribution settings. LVM-Med empirically outperforms a number of state-of-the-art supervised, self-supervised, and foundation models. For challenging tasks such as Brain Tumor Classification or Diabetic Retinopathy Grading, LVM-Med improves previous vision-language models trained on 1 billion masks by 6-7% while using only a ResNet-50.},
month = {12},
year = {2023},
title = {LVM-Med: Learning Large-Scale Self-Supervised Vision Models for Medical Imaging via Second-order Graph Matching},
booktitle = {The Thirty-Seventh Annual Conference on Neural Information Processing Systems (NeurIPS 2023). Neural Information Processing Systems (NeurIPS), December 10-16, USA},
publisher = {Advances in Neural Information Processing Systems},
author = {Ho Minh Duy Nguyen and Hoang Nguyen and Nghiem T. Diep and Tan Pham and Tri Cao and Binh T. Nguyen and Paul Swoboda and Nhat Ho and Shadi Albarqouni and Pengtao Xie and Daniel Sonntag and Mathias Niepert},
url = {https://www.dfki.de/fileadmin/user_upload/import/14309_LVM-Med_Camera_Version_2.pdf https://arxiv.org/abs/2306.11925}
}
H. M. D. Nguyen, T. Pham, N. T. Diep, N. Phan, Q. Pham, V. Tong, B. T. Nguyen, N. H. Le, N. Ho, P. Xie, D. Sonntag, und M. Niepert, "On the Out of Distribution Robustness of Foundation Models in Medical Image Segmentation" in Proc. The Thirty-Seventh Annual Conference on Neural Information Processing Systems (NeurIPS 2023). Neural Information Processing Systems (NeurIPS), Workshop on Robustness of Few-shot and Zero-shot Learning in Foundation Models, December 10-16, 2023.
@inproceedings{pub14499, abstract = {Constructing a robust model that can effectively generalize to test samples under distribution shifts remains a significant challenge in the field of medical imaging. The foundational models for vision and language, pre-trained on extensive sets of natural image and text data, have emerged as a promising approach. It showcases impressive learning abilities across different tasks with the need for only a limited amount of annotated samples. While numerous techniques have focused on developing better fine-tuning strategies to adapt these models for specific domains, we instead examine their robustness to domain shifts in the medical image segmentation task. To this end, we compare the generalization performance to unseen domains of various pre-trained models after being fine-tuned on the same in-distribution dataset and show that foundation-based models enjoy better robustness than other architectures. From here, we further developed a new Bayesian uncertainty estimation for frozen models and used them as an indicator to characterize the model’s performance on out-of-distribution (OOD) data, proving particularly beneficial for real-world applications. Our experiments not only reveal the limitations of current indicators like accuracy on the line or agreement on the line commonly used in natural image applications but also emphasize the promise of the introduced Bayesian uncertainty. Specifically, lower uncertainty predictions usually tend to higher out-of-distribution (OOD) performance.},
month = {12},
year = {2023},
title = {On the Out of Distribution Robustness of Foundation Models in Medical Image Segmentation},
booktitle = {The Thirty-Seventh Annual Conference on Neural Information Processing Systems (NeurIPS 2023). Neural Information Processing Systems (NeurIPS), Workshop on Robustness of Few-shot and Zero-shot Learning in Foundation Models, December 10-16},
publisher = {Advances in Neural Information Processing Systems},
author = {Ho Minh Duy Nguyen and Tan Pham and Nghiem Tuong Diep and Nghi Phan and Quang Pham and Vinh Tong and Binh T. Nguyen and Ngan Hoang Le and Nhat Ho and Pengtao Xie and Daniel Sonntag and Mathias Niepert},
keywords = {Foundation Models, Uncertainty Estimation, Robustness},
url = {https://www.dfki.de/fileadmin/user_upload/import/14499_52_on_the_out_of_distribution_rob.pdf https://arxiv.org/abs/2311.11096}
}
L. Lauer, H. Javaheri, K. Altmeyer, S. Malone, A. Grünerbl, M. Barz, M. Peschel, R. Brünken, und P. Lukowicz, "Encountering Students' Learning Difficulties in Electrics - Didactical Concept and Prototype of Augmented Reality-Toolkit" in Proc. Fostering scientific citizenship in an uncertain world - ESERA 2021 e-Proceedings. European Science Education Research Association Conference (ESERA-2021), Workshop: Digital Resources for Science Teaching and Learning, August 30 - September 3, 2022.
@inproceedings{pub12121, abstract = {Real-time visualization of electrical circuit scematics in accordance to the components’ semantic connection• Use of the toolkit may faciliate the acquisition of representational competencies (concerning the matching of components and symbols and the matching of circuits and circuit schematics)• Usable with either handheld AR-devices or head-mounted AR-devices},
year = {2022},
title = {Encountering Students' Learning Difficulties in Electrics - Didactical Concept and Prototype of Augmented Reality-Toolkit},
booktitle = {Fostering scientific citizenship in an uncertain world - ESERA 2021 e-Proceedings. European Science Education Research Association Conference (ESERA-2021), Workshop: Digital Resources for Science Teaching and Learning, August 30 - September 3},
publisher = {University of Minho},
author = {Luisa Lauer and Hamraz Javaheri and Kristin Altmeyer and Sarah Malone and Agnes Grünerbl and Michael Barz and Markus Peschel and Roland Brünken and Paul Lukowicz},
organization = {University of Minho},
url = {https://www.markus-peschel.de/files_neu/publikationen/Workshop_ESERA2021-komprimiert.pdf https://www.dfki.de/fileadmin/user_upload/import/12121_2022_Encountering_Students'_Learning_Difficulties_in_Electrics_-_Didactical_Concept_and_Prototype_of_Augmented_Reality-Toolkit.pdf}
}
M. Barz, O. S. Bhatti, und D. Sonntag, "Implicit Estimation of Paragraph Relevance from Eye Movements" Frontiers in Computer Science, vol. 3, p. 13.
2022.
doi: 10.3389/fcomp.2021.808507
@article{pub12165, abstract = {Eye movements were shown to be an effective source of implicit relevance feedback in constrained search and decision-making tasks. Recent research suggests that gaze-based features, extracted from scanpaths over short news articles (g-REL), can reveal the perceived relevance of read text with respect to a previously shown trigger question. In this work, we aim to confirm this finding and we investigate whether it generalizes to multi-paragraph documents from Wikipedia (Google Natural Questions) that require readers to scroll down to read the whole text. We conduct a user study (n=24) in which participants read single- and multi-paragraph articles and rate their relevance at the paragraph level with respect to a trigger question. We model the perceived document relevance using machine learning and features from the literature as input. Our results confirm that eye movements can be used to effectively model the relevance of short news articles, in particular if we exclude difficult cases: documents which are on topic of the trigger questions but irrelevant. However, our results do not clearly show that the modeling approach generalizes to multi-paragraph document settings. We publish our dataset and our code for feature extraction under an open source license to enable future research in the field of gaze-based implicit relevance feedback.},
month = {1},
year = {2022},
title = {Implicit Estimation of Paragraph Relevance from Eye Movements},
journal = {Frontiers in Computer Science},
volume = {3},
pages = {13},
publisher = {Frontiers Media S.A.},
doi = {https://doi.org/10.3389/fcomp.2021.808507},
author = {Michael Barz and Omair Shahzad Bhatti and Daniel Sonntag},
keywords = {Implicit relevance feedback, Reading analysis, machine learning, eye tracking, Perceived paragraph relevance, Eye movements and reading},
url = {https://www.frontiersin.org/articles/10.3389/fcomp.2021.808507 https://www.dfki.de/fileadmin/user_upload/import/12165_fcomp-03-808507.pdf}
}
M. Hartmann, A. Anagnostopoulou, und D. Sonntag, Interactive Machine Learning for Image Captioning.
@misc{pub12167, abstract = {We propose an approach for interactive learning for an image captioning model. As human feedback is expensive and modern neural network based approaches often require large amounts of supervised data to be trained, we envision a system that exploits human feedback as good as possible by multiplying the feedback using data augmentation methods, and integrating the resulting training examples into the model in a smart way. This approach has three key components, for which we need to find suitable practical implementations: feedback collection, data augmentation, and model update. We outline our idea and review different possibilities to address these tasks.},
month = {2},
year = {2022},
title = {Interactive Machine Learning for Image Captioning},
howpublished = {The AAAI-22 Workshop on Interactive Machine Learning},
author = {Mareike Hartmann and Aliki Anagnostopoulou and Daniel Sonntag},
keywords = {learning from feedback, image captioning, data augmentation},
url = {https://www.dfki.de/fileadmin/user_upload/import/12167_interactive_learning_for_image_captioning.pdf}
}
H. M. D. Nguyen, T. T. Nguyen, H. Vu, Q. Pham, M. Nguyen, B. T. Nguyen, und D. Sonntag, "TATL: Task Agnostic Transfer Learning for Skin Attributes Detection" Medical Image Analysis, vol. 01.
2022.
@article{pub12216, abstract = {Existing skin attributes detection methods usually initialize with a pre-trained Imagenet network and then fine-tune on a medical target task. However, we argue that such approaches are suboptimal because medical datasets are largely different from ImageNet and often contain limited training samples. In this work, we propose Task Agnostic Transfer Learning (TATL), a novel framework motivated by dermatologists' behaviors in the skincare context. TATL learns an attribute-agnostic segmenter that detects lesion skin regions and then transfers this knowledge to a set of attribute-specific classifiers to detect each particular attribute. Since TATL's attribute-agnostic segmenter only detects skin attribute regions, it enjoys ample data from all attributes, allows transferring knowledge among features, and compensates for the lack of training data from rare attributes. We conduct extensive experiments to evaluate the proposed TATL transfer learning mechanism with various neural network architectures on two popular skin attributes detection benchmarks. The empirical results show that TATL not only works well with multiple architectures but also can achieve state-of-the-art performances, while enjoying minimal model and computational complexities. We also provide theoretical insights and explanations for why our transfer learning framework performs well in practice.},
year = {2022},
title = {TATL: Task Agnostic Transfer Learning for Skin Attributes Detection},
journal = {Medical Image Analysis},
volume = {01},
pages = {1-27},
publisher = {Elsevier},
author = {Ho Minh Duy Nguyen and Thu T. Nguyen and Huong Vu and Quang Pham and Manh-Duy Nguyen and Binh T. Nguyen and Daniel Sonntag},
url = {https://arxiv.org/pdf/2104.01641.pdf https://www.dfki.de/fileadmin/user_upload/import/12216_TATL_Task_Agnostic_Transfer_Learning_for_Skin_Attributes_Detection.pdf}
}
T. Ott, P. Masset, T. Gouvea, und A. Kepecs, "Apparent sunk cost effect in rational agents" Science Advances, vol. 8, iss. 6.
2022.
doi: 10.1126/sciadv.abi7004
@article{pub12243, abstract = {Rational decision makers aim to maximize their gains, but humans and other animals often fail to do so, exhibiting biases and distortions in their choice behavior. In a recent study of economic decisions, humans, mice, and rats were reported to succumb to the sunk cost fallacy, making decisions based on irrecoverable past investments to the detriment of expected future returns. We challenge this interpretation because it is subject to a statistical fallacy, a form of attrition bias, and the observed behavior can be explained without invoking a sunk cost–dependent mechanism. Using a computational model, we illustrate how a rational decision maker with a reward-maximizing decision strategy reproduces the reported behavioral pattern and propose an improved task design to dissociate sunk costs from fluctuations in decision valuation. Similar statistical confounds may be common in analyses of cognitive behaviors, highlighting the need to use causal statistical inference and generative models for interpretation.},
number = {6},
month = {2},
year = {2022},
title = {Apparent sunk cost effect in rational agents},
journal = {Science Advances},
volume = {8},
pages = {1-10},
publisher = {American Association for the Advancement of Science},
doi = {https://doi.org/10.1126/sciadv.abi7004},
author = {Torben Ott and Paul Masset and Thiago Gouvea and Adam Kepecs},
url = {https://www.science.org/doi/10.1126/sciadv.abi7004 https://www.dfki.de/fileadmin/user_upload/import/12243_Apparent_sunk_cost_effect_in_rational_agents.pdf}
}
H. M. D. Nguyen, R. Henschel, B. Rosenhahn, D. Sonntag, und P. Swoboda, "LMGP: Lifted Multicut Meets Geometry Projections for Multi-Camera Multi-Object Tracking" in Proc. Conference on Computer Vision and Pattern Recognition (CVPR) 2022. International Conference on Computer Vision and Pattern Recognition (CVPR), June 21-24, 2022.
@inproceedings{pub12286, abstract = {Multi-Camera Multi-Object Tracking is currently drawing attention in the computer vision field due to its superior performance in real-world applications such as video surveillance with crowded scenes or in vast space. In this work, we propose a mathematically elegant multi-camera multiple object tracking approach based on a spatial-temporal lifted multicut formulation. Our model utilizes state-of-the-art tracklets produced by single-camera trackers as proposals. As these tracklets may contain ID-Switch errors, we refine them through a novel pre-clustering obtained from 3D geometry projections. As a result, we derive a better tracking graph without ID switches and more precise affinity costs for the data association phase. Tracklets are then matched to multi-camera trajectories by solving a global lifted multicut formulation that incorporates short and long-range temporal interactions on tracklets located in the same camera as well as inter-camera ones. Experimental results on the WildTrack dataset yield near-perfect result, outperforming state-of-the-art trackers on Campus while being on par on the PETS-09 dataset. We will make our implementations available upon acceptance of the paper.},
year = {2022},
title = {LMGP: Lifted Multicut Meets Geometry Projections for Multi-Camera Multi-Object Tracking},
booktitle = {Conference on Computer Vision and Pattern Recognition (CVPR) 2022. International Conference on Computer Vision and Pattern Recognition (CVPR), June 21-24},
publisher = {IEEE/CVF},
author = {Ho Minh Duy Nguyen and Roberto Henschel and Bodo Rosenhahn and Daniel Sonntag and Paul Swoboda},
url = {https://arxiv.org/pdf/2111.11892.pdf https://www.dfki.de/fileadmin/user_upload/import/12286_LMGP_Lifted_Multicut_Meets_Geometry_Projections_for_Multi-Camera.pdf}
}
P. Valdunciel, O. S. Bhatti, M. Barz, und D. Sonntag, "Interactive Assessment Tool for Gaze-based Machine Learning Models in Information Retrieval" in Proc. ACM SIGIR Conference on Human Information Interaction and Retrieval. ACM SIGIR Conference on Human Information Interaction and Retrieval (CHIIR-2022), March 14-18, Regensburg, Germany, 2022.
doi: 10.1145/3498366.3505834
@inproceedings{pub12287, abstract = {Eye movements were shown to be an effective source of implicit relevance feedback in information retrieval tasks. They can be used to, e.g., estimate the relevance of read documents and expand search queries using machine learning. In this paper, we present the Reading Model Assessment tool (ReMA), an interactive tool for assessing gaze-based relevance estimation models. Our tool allows experimenters to easily browse recorded trials, compare the model output to a ground truth, and visualize gaze-based features at the token- and paragraph-level that serve as model input. Our goal is to facilitate the understanding of the relation between eye movements and the human relevance estimation process, to understand the strengths and weaknesses of a model at hand, and, eventually, to enable researchers to build more effective models.},
month = {3},
year = {2022},
title = {Interactive Assessment Tool for Gaze-based Machine Learning Models in Information Retrieval},
booktitle = {ACM SIGIR Conference on Human Information Interaction and Retrieval. ACM SIGIR Conference on Human Information Interaction and Retrieval (CHIIR-2022), March 14-18, Regensburg, Germany},
isbn = {9781450391863},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3498366.3505834},
author = {Pablo Valdunciel and Omair Shahzad Bhatti and Michael Barz and Daniel Sonntag},
keywords = {eye tracking, relevance estimation, information retrieval, reading, data visualization, interactive model assessment},
url = {https://www.dfki.de/fileadmin/user_upload/import/12287_3498366.3505834.pdf https://www.researchgate.net/publication/359219435_Interactive_Assessment_Tool_for_Gaze-based_Machine_Learning_Models_in_Information_Retrieval}
}
T. Gouvea, I. Troshani, M. Herrlich, und D. Sonntag, "Annotating sound events through interactive design of interpretable features" in Proc. Proceedings of the First International Conference on Hybrid Human-Machine Intelligence. International Conference on Hybrid Human-Artificial Intelligence (HHAI-2022), June 13-17, Amsterdam, Netherlands, 2022.
@inproceedings{pub12428, series = {Frontiers of AI},
abstract = {Professionals of all domains of expertise expect to take part in the benefits of the machine learning (ML) revolution, but realisation is often slowed down by lack of training in ML concepts and tools, as well as low availability of annotated data for supervised methods. Inspired by the problem of assessing the impact of human-generated activity on marine ecosystems through passive acoustic monitoring (PAM), we are developing Seadash, an interactive tool for event detection and classification in multivariate time series.},
year = {2022},
title = {Annotating sound events through interactive design of interpretable features},
booktitle = {Proceedings of the First International Conference on Hybrid Human-Machine Intelligence. International Conference on Hybrid Human-Artificial Intelligence (HHAI-2022), June 13-17, Amsterdam, Netherlands},
publisher = {IOS Press},
author = {Thiago Gouvea and Ilira Troshani and Marc Herrlich and Daniel Sonntag},
keywords = {data analysis, interactive machine learning, data visualization, data mining, representation learning},
url = {https://www.dfki.de/fileadmin/user_upload/import/12428_Gouvea_et_al_2022_Annotating_Sound_Events_Through_Interactive_Design_of_Interpretable_Features.pdf https://ebooks.iospress.nl/doi/10.3233/FAIA220225}
}
T. S. Gouvêa, I. Troshani, M. Herrlich, und D. Sonntag, "Interactive design of interpretable features for marine soundscape data annotation" in Proc. Workshop on Human-centered Design of Symbiotic Hybrid Intelligence. Workshop on Human-centered Design of Symbiotic Hybrid Intelligence (HCSHI-2022), located at HHAI, June 14, Amsterdam, Netherlands, 2022.
@inproceedings{pub12429, abstract = {Machine learning (ML) is increasingly used in different application domains. However, to reach its full potential it is important that experts without extensive ML training be able to create and effectively apply models in their domain. This requires forms of co-learning that need to be facilitated by effective interfaces and interaction paradigms. Inspired by the problem of detecting and classifying sound events in marine soundscapes, we are developing Seadash. Through a rapid, iterative data exploration workflow, the user designs and curates features that capture meaningful structure in the data, and uses these to efficiently annotate the dataset. While the tool is still in early stages, we present the concept and discuss future directions.},
year = {2022},
title = {Interactive design of interpretable features for marine soundscape data annotation},
booktitle = {Workshop on Human-centered Design of Symbiotic Hybrid Intelligence. Workshop on Human-centered Design of Symbiotic Hybrid Intelligence (HCSHI-2022), located at HHAI, June 14, Amsterdam, Netherlands},
publisher = {HHAI},
author = {Thiago S. Gouvêa and Ilira Troshani and Marc Herrlich and Daniel Sonntag},
keywords = {interactive machine learning, marine research, passive acoustic monitoring, sound classification, data programming},
url = {https://ii.tudelft.nl/humancenteredsymbioticHI/node/9 https://www.dfki.de/fileadmin/user_upload/import/12429_Gouvea_et_al_2022_Interactive_design_of_interpretable_features_for_marine_soundscape_data.pdf}
}
A. Anagnostopoulou, M. Hartmann, und D. Sonntag, Putting Humans in the Image Captioning Loop.
@misc{pub12516, abstract = {Image Captioning (IC) models can highly benefit from human feedback in the training process, especially in cases where data is limited. We present work-in-progress on adapting an IC system to integrate human feedback, with the goal to make it easily adaptable to user-specific data. Our approach builds on a base IC model pre-trained on the MS COCO dataset, which generates captions for unseen images. The user will then be able to offer feedback on the image and the generated/predicted caption, which will be augmented to create additional training instances for the adaptation of the model. The additional instances are integrated into the model using step-wise updates, and a sparse memory replay component is used to avoid catastrophic forgetting. We hope that this approach, while leading to improved results, will also result in customizable IC models.},
month = {7},
year = {2022},
title = {Putting Humans in the Image Captioning Loop},
howpublished = {Bridging Human-Computer Interaction and Natural Language Processing (NAACL 2022)},
author = {Aliki Anagnostopoulou and Mareike Hartmann and Daniel Sonntag},
url = {https://drive.google.com/file/d/1WT1Emfc76Myv_PujMXaWI4ucqF9eegqC/view https://www.dfki.de/fileadmin/user_upload/import/12516_5.pdf}
}
M. Hartmann und D. Sonntag, "A survey on improving NLP models with human explanations" in Proc. Proceedings of the First Workshop on Learning with Natural Language Supervision. Workshop on Learning with Natural Language Supervision, located at ACL 2022, May 26, Dublin, Ireland, 2022.
@inproceedings{pub12519, abstract = {Training a model with access to human explanations can improve data efficiency and model performance on in- and out-of-domain data. Adding to these empirical findings, similarity with the process of human learning makes learning from explanations a promising way to establish a fruitful human-machine interaction. Several methods have been proposed for improving natural language processing (NLP) models with human explanations, that rely on different explanation types and mechanism for integrating these explanations into the learning process. These methods are rarely compared with each other, making it hard for practitioners to choose the best combination of explanation type and integration mechanism for a specific use-case. In this paper, we give an overview of different methods for learning from human explanations, and discuss different factors that can inform the decision of which method to choose for a specific use-case.},
year = {2022},
title = {A survey on improving NLP models with human explanations},
booktitle = {Proceedings of the First Workshop on Learning with Natural Language Supervision. Workshop on Learning with Natural Language Supervision, located at ACL 2022, May 26, Dublin, Ireland},
publisher = {Association for Computational Linguistics},
author = {Mareike Hartmann and Daniel Sonntag},
keywords = {Human explanations; NLP},
url = {https://aclanthology.org/2022.lnls-1.5.pdf https://www.dfki.de/fileadmin/user_upload/import/12519_A_survey_on_improving_NLP_models_with_human_explanations.pdf}
}
L. Graf, M. Altmeyer, K. Emmerich, M. Herrlich, A. Krekhov, und K. Spiel, "Development and Validation of a German Version of the Player Experience Inventory (PXI)" in Proc. Proceedings of the Mensch und Computer Conference. Mensch und Computer (MuC-2022), September 4-7, Darmstadt, Germany, 2022.
doi: 10.1145/3543758.3543763
@inproceedings{pub12535, abstract = {The Player Experience Inventory (PXI), initially developed by Abeele et al. (2020), measures player experiences among English-speaking players. However, empirically validated translations of the PXI are sparse, limiting the use of the scale among non-English speaking players. In this paper, we address this issue by providing a translated version of the scale in German, the most widely spoken first language in the European Union. After translating the original items, we conducted a confirmatory factor analysis (N=506) to validate the German version of the PXI. Our results confirmed a 10-factor model - which the original authors of the instrument suggested - and show that the German PXI has valid psychometric properties. While model fit, internal consistency and convergent validity were acceptable, there was room for improvement regarding discriminant validity. Based on our results, we advocate for the German PXI as a valid and reliable instrument for assessing player experiences in German-speaking samples.},
year = {2022},
title = {Development and Validation of a German Version of the Player Experience Inventory (PXI)},
booktitle = {Proceedings of the Mensch und Computer Conference. Mensch und Computer (MuC-2022), September 4-7, Darmstadt, Germany},
publisher = {ACM},
doi = {https://doi.org/10.1145/3543758.3543763},
author = {Linda Graf and Maximilian Altmeyer and Katharina Emmerich and Marc Herrlich and Andrey Krekhov and Katta Spiel},
keywords = {games user research;PXI;validation;games},
url = {https://www.dfki.de/fileadmin/user_upload/import/12535_MuC22__German_PXI_Version.pdf https://dl.acm.org/doi/10.1145/3543758.3543763}
}
D. Queck, I. Albert, N. Burkard, P. Zimmer, G. Volkmar, B. Dänekas, R. Malaka, und M. Herrlich, "SpiderClip: Towards an Open Source System for Wearable Device Simulation in Virtual Reality" in Proc. CHI EA '22: Extended Abstracts of the 2022 CHI Conference on Human Factors in Computing Systems. International Conference Extended Abstracts on Human Factors in Computing Systems (CHI EA-2022), 2022.
doi: 10.1145/3491101.3519758
@inproceedings{pub12550, abstract = {Smartwatches and fitness trackers integrate different sensors from inertial measurement units to heart rate sensors in a very compact and affordable form factor. This makes them interesting and relevant research tools. One potential application domain is virtual reality, e.g., for health related applications such as exergames or simulation approaches. However, commercial devices complicate and limit the collection of raw and real-time data, suffer from privacy issues and are not tailored to using them with VR tracking systems. We address these issues with an open source design to facilitate the construction of VR-enabled wearables for conducting scientific experiments. Our work is motivated by research in mixed realities in pervasive computing environments. We introduce our system and present a proof-of-concept study with 17 participants. Our results show that the wearable reliably measures high-quality data comparable to commercially available fitness trackers and that it does not impede movements or interfere with VR tracking.},
month = {4},
year = {2022},
title = {SpiderClip: Towards an Open Source System for Wearable Device Simulation in Virtual Reality},
booktitle = {CHI EA '22: Extended Abstracts of the 2022 CHI Conference on Human Factors in Computing Systems. International Conference Extended Abstracts on Human Factors in Computing Systems (CHI EA-2022)},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3491101.3519758},
author = {Dirk Queck and Iannis Albert and Nicole Burkard and Philipp Zimmer and Georg Volkmar and Bastian Dänekas and Rainer Malaka and Marc Herrlich},
url = {https://dl.acm.org/doi/abs/10.1145/3491101.3519758#sec-supp https://www.dfki.de/fileadmin/user_upload/import/12550_SpiderClip__Towards_an_Open_Source_System_for_Wearable.pdf}
}
M. Rekrut, A. M. Selim, und A. Krüger, "Improving Silent Speech BCI Training Procedures through Transfer from Overt to Silent Speech" in Proc. Proceedings of the IEEE International Conference on Systems, Man, and Cybernetics. IEEE International Conference on Systems, Man, and Cybernetics (SMC-2022), 2022.
@inproceedings{pub12619, abstract = {Silent speech Brain-Computer Interfaces (BCIs) try to decode imagined speech from brain activity. Those BCIs require a tremendous amount of training data usually collected during mentally and physically exhausting sessions in which participants silently repeat words presented on a screen for several hours. Within this work we present an approach to overcome those exhausting sessions by training a silent speech classifier on data recorded while speaking certain words and transferring this classifier to EEG data recorded during silent repetition of the same words. This approach does not only allow for a less mentally and physically exhausting training procedure but also for a more productive one as the overt speech output can be used for interaction while the classifier for silent speech is trained simultaneously. We evaluated our approach in a study in which 15 participants navigated a virtual robot on a screen in a game like scenario through a maze once with 5 overtly spoken and once with the same 5 silently spoken command words. In an offline analysis we trained a classifier on overt speech data and let it predict silent speech data. Our classification results do not only show successful results for the transfer (61.78%) significantly above chance level but also comparable results to a standard silents speech classifier (71.48%) trained and tested on the same data. These results illustrate the potential of the method to replace the currently tedious training procedures for silent speech BCIs with a more comfortable, engaging and productive approach by a transfer from overt to silent speech.},
year = {2022},
title = {Improving Silent Speech BCI Training Procedures through Transfer from Overt to Silent Speech},
booktitle = {Proceedings of the IEEE International Conference on Systems, Man, and Cybernetics. IEEE International Conference on Systems, Man, and Cybernetics (SMC-2022)},
publisher = {IEEE},
author = {Maurice Rekrut and Abdulrahman Mohamed Selim and Antonio Krüger}
}
K. Kuznetsov, M. Barz, und D. Sonntag, "SpellInk: Interactive correction of spelling mistakes in handwritten text" in Proc. Proceedings of the First International Conference on Hybrid Human-Machine Intelligence. International Conference on Hybrid Human-Artificial Intelligence (HHAI-2022), the 1st International Conference on Hybrid Human-Artificial Intelligence, June 13-17, Amsterdam, Netherlands, De Boelelaan 1105, 1081 HV Amsterdam, Netherlands, 2022.
doi: 10.3233/FAIA220216
@inproceedings{pub12621, series = {Frontiers in Artificial Intelligence and Applications},
abstract = {Despite the current dominance of typed text, writing by hand remains the most natural mean of written communication and information keeping. Still, digital pen input provides limited user experience and lacks flexibility, as most of the manipulations are performed on a digitalized version of the text. In this paper, we present our prototype that enables spellchecking for handwritten text: it allows users to interactively correct misspellings directly in a handwritten script. We plan to study the usability of the proposed user interface and its acceptance by users. Also, we aim to investigate how user feedback can be used to incrementally improve the underlying recognition models.},
year = {2022},
title = {SpellInk: Interactive correction of spelling mistakes in handwritten text},
booktitle = {Proceedings of the First International Conference on Hybrid Human-Machine Intelligence. International Conference on Hybrid Human-Artificial Intelligence (HHAI-2022), the 1st International Conference on Hybrid Human-Artificial Intelligence, June 13-17, Amsterdam, Netherlands},
volume = {354},
pages = {278-280},
isbn = {978-1-64368-308-9},
address = {De Boelelaan 1105, 1081 HV Amsterdam, Netherlands},
publisher = {IOS Press},
doi = {10.3233/FAIA220216},
author = {Konstantin Kuznetsov and Michael Barz and Daniel Sonntag},
keywords = {digital pen, handwriting recognition, handwriting generation},
organization = {Vrije Universiteit Amsterdam},
url = {https://www.hhai-conference.org/demos/pd_paper_5349/ https://www.hhai-conference.org/wp-content/uploads/2022/06/hhai2022-pd_paper_5349.pdf https://www.dfki.de/fileadmin/user_upload/import/12621_hhai22_demo_spellink.pdf}
}
F. Céard-Falkenberg, K. Kuznetsov, A. Prange, M. Barz, und D. Sonntag, "PEncode: A Tool for Visualizing Pen Signal Encodings in Real-time" in Proc. Proceedings of the First International Conference on Hybrid Human-Machine Intelligence. International Conference on Hybrid Human-Artificial Intelligence (HHAI-2022), 1st International Conference on Hybrid Human-Artificial Intelligence, June 13-17, Amsterdam, Netherlands, De Boelelaan 1105, 1081 HV Amsterdam, Netherlands, 2022.
doi: 10.3233/FAIA220217
@inproceedings{pub12622, series = {Frontiers in Artificial Intelligence and Applications},
abstract = {Many features have been proposed for encoding the input signal from digital pens and touch-based interaction. They are widely used for analyzing and classifying handwritten texts, sketches, or gestures. Although they are well defined mathematically, many features are non-trivial and therefore difficult to understand for a human. In this paper, we present an application that visualizes a subset from 114 digital pen features in real-time while drawing. It provides an easy-to-use interface that allows application developers and machine learning practitioners to learn how digital pen features encode their inputs, helps in the feature selection process, and enables rapid prototyping of sketch and gesture classifiers.},
year = {2022},
title = {pEncode: A Tool for Visualizing Pen Signal Encodings in Real-time},
booktitle = {Proceedings of the First International Conference on Hybrid Human-Machine Intelligence. International Conference on Hybrid Human-Artificial Intelligence (HHAI-2022), 1st International Conference on Hybrid Human-Artificial Intelligence, June 13-17, Amsterdam, Netherlands},
volume = {354},
pages = {281-284},
isbn = {978-1-64368-308-9},
address = {De Boelelaan 1105, 1081 HV Amsterdam, Netherlands},
publisher = {IOS Press},
doi = {10.3233/FAIA220217},
author = {Felix Céard-Falkenberg and Konstantin Kuznetsov and Alexander Prange and Michael Barz and Daniel Sonntag},
keywords = {digital pen, gesture recognition, digital pen features, machine learning},
organization = {Vrije Universiteit Amsterdam},
url = {https://www.hhai-conference.org/demos/pd_paper_6439/ https://www.youtube.com/watch?v=t80aa2E5jKo https://www.dfki.de/fileadmin/user_upload/import/12622_hhai22_demo_pencode.pdf}
}
O. S. Bhatti, M. Barz, und D. Sonntag, "Leveraging Implicit Gaze-Based User Feedback for Interactive Machine Learning" in Proc. KI 2022: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI), Cham, 2022.
@inproceedings{pub12633, abstract = {Interactive Machine Learning (IML) systems incorporate humans into the learning process to enable iterative and continuous model improvements. The interactive process can be designed to leverage the expertise of domain experts with no background in machine learning, for instance, through repeated user feedback requests. However, excessive requests can be perceived as annoying and cumbersome and could reduce user trust. Hence, it is mandatory to establish an efficient dialog between a user and a machine learning system. We aim to detect when a domain expert disagrees with the output of a machine learning system by observing its eye movements and facial expressions. In this paper, we describe our approach for modelling user disagreement and discuss how such a model could be used for triggering user feedback requests in the context of interactive machine learning.},
year = {2022},
title = {Leveraging Implicit Gaze-Based User Feedback for Interactive Machine Learning},
booktitle = {KI 2022: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI), Cham},
editor = {Ralph Bergmann and Lukas Malburg and Stephanie C. Rodermund and Ingo J. Timm},
pages = {9-16},
isbn = {978-3-031-15791-2},
publisher = {Springer International Publishing},
author = {Omair Shahzad Bhatti and Michael Barz and Daniel Sonntag},
url = {https://doi.org/10.1007/978-3-031-15791-2 https://www.dfki.de/fileadmin/user_upload/import/12633_Leveraging_implicit_gaze_based_user_feedback_for_interactive_machine_learning__KI_22__Accepted__(6).pdf}
}
S. Liang, K. Kades, M. A. Fink, P. M. Full, T. F. Weber, J. Kleesiek, M. Strube, und K. Maier-Hein, "Fine-tuning BERT Models for Summarizing German Radiology Findings" in Proc. Proceedings of the 4th Clinical Natural Language Processing Workshop. Clinical Natural Language Processing Workshop (ClinicalNLP-2022), located at NAACL 2022, July 14, Seattle,, WA, USA, 2022.
@inproceedings{pub12809, abstract = {Writing the conclusion section of radiology reports is essential for communicating the radiology findings and its assessment to physicians in a condensed form. In this work, we employ a transformer-based Seq2Seq model for generating the conclusion section of German radiology reports. The model is initialized with the pre-trained parameters of a German BERT model and fine-tuned in our downstream task on our domain data. We proposed two strategies to improve the factual correctness of the model. In the first method, next to the abstractive learning objective, we introduce an extraction learning objective to train the decoder in the model to both generate one summary sequence and extract the key findings from the source input. The second approach is to integrate the pointer mechanism into the transformer-based Seq2Seq model. The pointer network helps the Seq2Seq model to choose between generating tokens from the vocabulary or copying parts from the source input during generation. The results of the automatic and human evaluations show that the enhanced Seq2Seq model is capable of generating human-like radiology conclusions and that the improved models effectively reduce the factual errors in the generations despite the small amount of training data.},
month = {7},
year = {2022},
title = {Fine-tuning BERT Models for Summarizing German Radiology Findings},
booktitle = {Proceedings of the 4th Clinical Natural Language Processing Workshop. Clinical Natural Language Processing Workshop (ClinicalNLP-2022), located at NAACL 2022, July 14, Seattle,, WA, USA},
editor = {Tristan Naumann and Steven Bethard and Kirk Roberts and Anna Rumshisky},
publisher = {Association for Computational Linguistics},
author = {Siting Liang and Klaus Kades and Matthias A. Fink and Peter M. Full and Tim F. Weber and Jens Kleesiek and Michael Strube and Klaus Maier-Hein},
url = {https://aclanthology.org/2022.clinicalnlp-1.4.pdf https://www.dfki.de/fileadmin/user_upload/import/12809_2022.clinicalnlp-1.4.pdf}
}
S. Liang, M. Hartmann, und D. Sonntag, "Cross-lingual German Biomedical Information Extraction: from Zero-shot to Human-in-the-Loop" in Proc. Second Workshop on Bridging Human-Computer Interaction and Natural Language Processing - Proceedings of the Workshop. Workshop on Bridging Human-Computer Interaction and Natural Language Processing (HCI+NLP-2022), located at NAACL 2022, July 15, Seattle, Washington, USA, 2022.
@inproceedings{pub12839, abstract = {This paper presents our project proposal for extracting biomedical information from German clinical narratives with limited amounts of annotations. We first describe the applied strategies in transfer learning and active learning for solving our problem. After that, we discuss the design of the user interface for both supplying model inspection and obtaining user annotations in the interactive environment.},
month = {7},
year = {2022},
title = {Cross-lingual German Biomedical Information Extraction: from Zero-shot to Human-in-the-Loop},
booktitle = {Second Workshop on Bridging Human-Computer Interaction and Natural Language Processing - Proceedings of the Workshop. Workshop on Bridging Human-Computer Interaction and Natural Language Processing (HCI+NLP-2022), located at NAACL 2022, July 15, Seattle, Washington, USA},
isbn = {978-1-955917-90-2},
publisher = {Association for Computational Linguistics},
author = {Siting Liang and Mareike Hartmann and Daniel Sonntag},
organization = {NAACL},
url = {https://www.dfki.de/fileadmin/user_upload/import/12839_2022_HCI+NLP.3.1.pdf https://arxiv.org/abs/2301.09908}
}
G. Volkmar, D. Alexandrovsky, A. E. Eilks, D. Queck, M. Herrlich, und R. Malaka, "Effects of PCG on Creativity in Playful City-Building Environments in VR" Proceedings of the ACM on Human-Computer Interaction (PACMHCI), vol. 6.
2022.
@article{pub12840, abstract = {The use of procedural content generation (PCG) in the context of video games has increased over the years as it provides an economical way to generate game content whilst enhancing their variety and replayability. For city-building games, this approach is often utilized to predefine map layouts, terrains, or cityscapes for the player. One core aspect of facilitating enjoyment in these games comes from creative expressivity. PCG, in this context, may support creativity by lowering the technical complexity for content creation, or it may hinder creativity by taking away control and freedom from the user. To examine these potential effects, this paper investigates if PCG has an impact on players' creativity in the context of VR city-building games. We present a VR prototype that provides varying degrees of procedural content: No PCG, terrain generation, city generation, and full (city + terrain) generation. In a remote user study, these conditions were compared regarding their capability to support creativity. Statistical tests for equivalence revealed that the presence of PCG did not affect creativity in any way. Our work suggests that PCG can be a useful integration into city-building games without notably decreasing players' ability to express themselves creatively.},
year = {2022},
title = {Effects of PCG on Creativity in Playful City-Building Environments in VR},
journal = {Proceedings of the ACM on Human-Computer Interaction (PACMHCI)},
volume = {6},
pages = {1-20},
publisher = {Association for Computing Machinery},
author = {Georg Volkmar and Dmitry Alexandrovsky and Asmus Eike Eilks and Dirk Queck and Marc Herrlich and Rainer Malaka}
}
I. Albert, N. Burkard, D. Queck, und M. Herrlich, "The Effect of Auditory-Motor Synchronization in Exergames on the Example of the VR Rhythm Game BeatSaber" Proceedings of the ACM on Human-Computer Interaction (PACMHCI), vol. 6.
2022.
@article{pub12841, abstract = {Physical inactivity and an increasingly sedentary lifestyle constitute a significant public health concern. Exergames try to tackle this problem by combining exercising with motivational gameplay. Another approach in sports science is the use of auditory-motor synchronization, the entrainment of movements to the rhythm of music. There are already commercially successful games making use of the combination of both, such as the popular VR rhythm game BeatSaber. However, unlike traditional exercise settings often relying on periodic movements that can be easily entrained to a rhythmic pulse, exergames typically offer an additional cognitive challenge through their gameplay and might be based more on reaction or memorization. That poses the question as to what extent the effects of auditory-motor synchronization can be transferred to exergames, and if the synchronization of music and gameplay facilitates the playing experience. We conducted a user study (N = 54) to investigate the effects of different degrees of synchronization between music and gameplay using the VR rhythm game BeatSaber. Results show significant effects on performance, perceived workload, and player experience between the synchronized and non-synchronized conditions, but the results seem to be strongly mediated by the ability of the participants to consciously perceive the synchronization differences.},
year = {2022},
title = {The Effect of Auditory-Motor Synchronization in Exergames on the Example of the VR Rhythm Game BeatSaber},
journal = {Proceedings of the ACM on Human-Computer Interaction (PACMHCI)},
volume = {6},
pages = {1-26},
publisher = {Association for Computing Machinery},
author = {Iannis Albert and Nicole Burkard and Dirk Queck and Marc Herrlich}
}
S. Szeier, B. Baffy, G. Baranyi, J. Skaf, L. Kopácsi, D. Sonntag, G. Sörös, und A. Lőrincz, 3D Semantic Label Transfer and Matching in Human-Robot CollaborationLearning to Generate 3D Shapes and Scenes, ECCV 2022 Workshop.
@misc{pub12900, abstract = {Semantic 3D maps are highly useful for human-robot collaboration and joint task planning. We build upon an existing real-time 3D semantic reconstruction pipeline and extend it with semantic matching across human and robot viewpoints, which is required if class labels differ or are missing due to different perspectives during collaborative reconstruction. We use deep recognition networks, which usually perform well from higher (human) viewpoints but are inferior from ground robot viewpoints. Therefore, we propose several approaches for acquiring semantic labels for unusual perspectives. We group the pixels from the lower viewpoint, project voxel class labels of the upper perspective to the lower perspective and apply majority voting to obtain labels for the robot. The quality of the reconstruction is evaluated in the Habitat simulator and in a real environment using a robot car equipped with an RGBD camera. The proposed approach can provide high-quality semantic segmentation from the robot perspective with accuracy similar to the human perspective. Furthermore, as computations are close to real time, the approach enables interactive applications.},
month = {10},
year = {2022},
title = {3D Semantic Label Transfer and Matching in Human-Robot Collaboration},
publisher = {Learning to Generate 3D Shapes and Scenes, ECCV 2022 Workshop},
author = {Szilvia Szeier and Benjámin Baffy and Gábor Baranyi and Joul Skaf and László Kopácsi and Daniel Sonntag and Gábor Sörös and András Lőrincz},
url = {https://learn3dg.github.io/ https://www.dfki.de/fileadmin/user_upload/import/12900_0003_paper.pdf}
}
M. A. Rezaei, A. Fathollahi, S. Rezaei, J. Hu, M. Gheisarnejad, A. R. Teimouri, R. Rituraj, A. Mosavi, und M. Khooban, "Adaptation of A Real-Time Deep Learning Approach with An Analog Fault Detection Technique for Reliability Forecasting of Capacitor Banks Used in Mobile Vehicles" IEEE Access (IEEE), vol. 10.
2022.
doi: 1109/ACCESS.2022.3228916
@article{pub12980, abstract = {The DC-Link capacitor is defined as the essential electronics element which sources or sinks the respective currents. The reliability of DC-link capacitor-banks (CBs) encounters many challenges due to their usage in electric vehicles. Heavy shocks may damage the internal capacitors without shutting down the CB. The fundamental development obstacles of CBs are: lack of considering capacitor degradation in reliability assessment, the impact of unforeseen sudden internal capacitor faults in forecasting CB lifetime, and the faults consequence on CB degradation. The sudden faults change the CB capacitance, which leads to reliability change. To more accurately estimate the reliability, the type of the fault needs to be detected for predicting the correct post-fault capacitance. To address these practical problems, a new CB model and reliability assessment formula covering all fault types are first presented, then, a new analog fault-detection method is presented, and a combination of online-learning long short-term memory (LSTM) and fault-detection method is subsequently performed, which adapt the sudden internal CB faults with the LSTM to correctly predict the CB degradation. To confirm the correct LSTM operation, four capacitors degradation is practically recorded for 2000-hours, and the off-line faultless degradation values predicted by the LSTM are compared with the actual data. The experimental findings validate the applicability of the proposed method. The codes and data are provided.},
month = {12},
year = {2022},
title = {Adaptation of A Real-Time Deep Learning Approach with An Analog Fault Detection Technique for Reliability Forecasting of Capacitor Banks Used in Mobile Vehicles},
journal = {IEEE Access (IEEE)},
volume = {10},
pages = {132271-132287},
publisher = {IEEE},
doi = {https://doi.org/1109/ACCESS.2022.3228916},
author = {Mohammad Amin Rezaei and Arman Fathollahi and Sajad Rezaei and Jiefeng Hu and Meysam Gheisarnejad and Ali Reza Teimouri and Rituraj Rituraj and Amirhosein Mosavi and Mohammad-Hassan Khooban},
url = {https://www.researchgate.net/publication/366230330_Adaptation_of_A_Real-Time_Deep_Learning_Approach_with_An_Analog_Fault_Detection_Technique_for_Reliability_Forecasting_of_Capacitor_Banks_Used_in_Mobile_Vehicles https://www.dfki.de/fileadmin/user_upload/import/12980_Adaptation_of_a_Real-Time_Deep_Learning.pdf}
}
J. K. Sandhu, U. K. Lilhore, P. M, N. Kaur, S. S. Band, M. Hamdi, C. Iwendi, S. Simaiya, M. M. Kamruzzaman, und A. Mosavi, "Predicting the Risk of Heart Failure Based on Clinical Data" Human-centric Computing and Information Sciences (HCIS), vol. 12.
2022.
doi: 10.22967/HCIS.2022.12.057
@article{pub12981, abstract = {The disorder that directly impacts the heart and the blood vessels inside the body is cardiovascular disease (CVD). According to the World Health Organization reports, CVDs are the leading cause of mortality worldwide, claiming the human life of nearly 23.6 million people annually. The categorization of diseases in CVD includes coronary heart disease, strokes, and transient ischemic attacks (TIA), peripheral arterial disease, aortic disease. Most CVD fatalities are caused by strokes and heart attacks, with an estimated one-third of these deaths currently happening before 60. The standard medical organization "New York Heart Association" (NYHA) categorize the various stages of heart failure as Class I (with no symptoms), Class II (mild symptoms), Class III (comfortable only when in resting position), Class IV (severe condition or patient is bed-bound), and Class V (unable to determine the class). Machine learning-based methods play an essential role in clinical data analysis. This research presents the importance of various essential attributes related to heart disease based on a hybrid machine learning model. The proposed hybrid model SVM-GA is based on a support vector machine and the genetic algorithm. This research analyzed an online dataset obtainable at the UCI Machine Learning Repository with the medical data of 299 patients who suffered from heart failures and are classified as Class III or IV as per the standard NYHA. This dataset was collected through patients' available follow-up and checkup duration and involved thirteen clinical characteristics. The proposed machine learning models were used to calculate feature importance in this research. The proposed model and existing well-known machine learning based-models, i.e., Bayesian generalized linear model, ANN, Bagged CART, Bag Earth, and SVM, are implemented using Python and various performance measuring parameters, i.e., accuracy, processing time, precision, recall, F-measures are calculated. Experimental analysis shows the proposed SVM-GA model strengthens in terms of better accuracy, processing time, precision, recall, F-measures over existing methods.},
month = {12},
year = {2022},
title = {Predicting the Risk of Heart Failure Based on Clinical Data},
journal = {Human-centric Computing and Information Sciences (HCIS)},
volume = {12},
pages = {1322-1355},
publisher = {Kora Information Processing Soc (KIPS-CSWRG))},
doi = {https://doi.org/10.22967/HCIS.2022.12.057},
author = {Jasminder Kaur Sandhu and Umesh Kumar Lilhore and Poongodi M and Navpreet Kaur and Shahab S. Band and Mounir Hamdi and Celestine Iwendi and Sarita Simaiya and M.M. Kamruzzaman and Amirhosein Mosavi},
keywords = {Heart Failure, Machine Learning, Computing, Healthcare, Biomedical Diagnosis},
url = {https://www.researchgate.net/publication/366297985_Predicting_the_Risk_of_Heart_Failure_Based_on_Clinical_Data/link/639b3250e42faa7e75c57942/download?_tp=eyJjb250ZXh0Ijp7ImZpcnN0UGFnZSI6InB1YmxpY2F0aW9uIiwicGFnZSI6InB1YmxpY2F0aW9uIn19 https://www.dfki.de/fileadmin/user_upload/import/12981_Predicting_the_Risk_of_Heart_Failure_Based_on_Clinical_Data.pdf}
}
M. Manshadi, M. Mousavi, M. Soltani, A. Mosavi, und L. Kovacs, "Deep Learning for Modeling an Offshore Hybrid Wind–Wave Energy System" Energies, vol. 15, iss. 24.
2022.
doi: 10.3390/en15249484
@article{pub12990, abstract = {The combination of an offshore wind turbine and a wave energy converter on an integrated platform is an economical solution for the electrical power demand in coastal countries. Due to the expensive installation cost, a prediction should be used to investigate whether the location is suitable for these sites. For this purpose, this research presents the feasibility of installing a combined hybrid site in the desired coastal location by predicting the net produced power due to the environmental parameters. For combining these two systems, an optimized array includes ten turbines and ten wave energy converters. The mathematical equations of the net force on the two introduced systems and the produced power of the wind turbines are proposed. The turbines’ maximum forces are 4 kN, and for the wave energy converters are 6 kN, respectively. Furthermore, the comparison is conducted in order to find the optimum system. The comparison shows that the most effective system of desired environmental condition is introduced. A number of machine learning and deep learning methods are used to predict key parameters after collecting the dataset. Moreover, a comparative analysis is conducted to find a suitable model. The models’ performance has been well studied through generating the confusion matrix and the receiver operating characteristic (ROC) curve of the hybrid site. The deep learning model outperformed other models, with an approximate accuracy of 0.96.},
number = {24},
month = {12},
year = {2022},
title = {Deep Learning for Modeling an Offshore Hybrid Wind–Wave Energy System},
journal = {Energies},
volume = {15},
pages = {9484-9494},
publisher = {MDPI},
doi = {https://doi.org/10.3390/en15249484},
author = {Mahsa Manshadi and Milad Mousavi and M. Soltani and Amirhosein Mosavi and Levente Kovacs},
keywords = {renewable energy; artificial intelligence; machine learning; comparative analysis; wind turbine; energy; deep learning; big data; wave energy; wave power; offshore},
url = {https://www.mdpi.com/1996-1073/15/24/9484 https://www.dfki.de/fileadmin/user_upload/import/12990_"Deep_Learning_for_Modeling_an_Offshore_Hybrid_Wind_8211}
}
S. Yan, M. Tian, K. A. Alattas, A. Mohamadzadeh, M. H. Sabzalian, und A. Mosavi, "An Experimental Machine Learning Approach for Mid-Term Energy Demand Forecasting" IEEE Access (IEEE), vol. 10.
2022.
doi: 10.1109/ACCESS.2022.3221454
@article{pub12991, abstract = {In this study, a neural network-based approach is designed for mid-term load forecasting (MTLF). The structure and hyperparameters are tuned to obtain the best forecasting accuracy one year ahead. The suggested approach is practically applied to a region in Iran by the use of real-world data sets of 10 years. The influential factors such as economic, weather, and social factors are investigated, and their impact on accuracy is numerically analyzed. The bad data are detected by a suggested effective method. In addition to load peak, the 24-hours load pattern is also predicted, which helps for better mid-term planning. The simulations show that the suggested approach is practical, and the accuracy is more than 95%, even when there are drastic weather changes.},
month = {11},
year = {2022},
title = {An Experimental Machine Learning Approach for Mid-Term Energy Demand Forecasting},
journal = {IEEE Access (IEEE)},
volume = {10},
pages = {118926-118940},
publisher = {IEEE},
doi = {https://doi.org/10.1109/ACCESS.2022.3221454},
author = {Shu-Rong Yan and Manwen Tian and Khalid A. Alattas and Ardashir Mohamadzadeh and Mohammad Hosein Sabzalian and Amirhosein Mosavi},
url = {https://ieeexplore.ieee.org/document/9945969 https://www.dfki.de/fileadmin/user_upload/import/12991_An_Experimental_Machine_Learning_Approach_for_Mid-Term_Energy_Demand_Forecasting.pdf}
}
M. Hartmann, H. Du, N. Feldhus, I. Kruijff-Korbayová, und D. Sonntag, "XAINES: Explaining AI with Narratives" KI - Künstliche Intelligenz, German Journal on Artificial Intelligence - Organ des Fachbereiches "Künstliche Intelligenz" der Gesellschaft für Informatik e.V. (KI), vol. 36.
2022.
doi: 10.1007/s13218-022-00780-8
@article{pub13116, abstract = {Artificial Intelligence (AI) systems are increasingly pervasive: Internet of Things, in-car intelligent devices, robots, and virtual assistants, and their large-scale adoption makes it necessary to explain their behaviour, for example to their users who are impacted by their decisions, or to their developers who need to ensure their functionality. This requires, on the one hand, to obtain an accurate representation of the chain of events that caused the system to behave in a certain way (e.g., to make a specific decision). On the other hand, this causal chain needs to be communicated to the users depending on their needs and expectations. In this phase of explanation delivery, allowing interaction between user and model has the potential to improve both model quality and user experience. The XAINES project investigates the explanation of AI systems through narratives targeted to the needs of a specific audience, focusing on two important aspects that are crucial for enabling successful explanation: generating and selecting appropriate explanation content, i.e. the information to be contained in the explanation, and delivering this information to the user in an appropriate way. In this article, we present the project’s roadmap towards enabling the explanation of AI with narratives.},
month = {12},
year = {2022},
title = {XAINES: Explaining AI with Narratives},
editor = {Ute Schmid and Britta Wrede},
journal = {KI - Künstliche Intelligenz, German Journal on Artificial Intelligence - Organ des Fachbereiches "Künstliche Intelligenz" der Gesellschaft für Informatik e.V. (KI)},
volume = {36},
pages = {287-296},
publisher = {Springer},
doi = {10.1007/s13218-022-00780-8},
author = {Mareike Hartmann and Han Du and Nils Feldhus and Ivana Kruijff-Korbayová and Daniel Sonntag},
keywords = {xai, explainable ai, interaction, explanations},
url = {https://doi.org/10.1007/s13218-022-00780-8 https://www.dfki.de/fileadmin/user_upload/import/13116_s13218-022-00780-8.pdf}
}
H. M. D. Nguyen, D. M. Nguyen, H. Vu, B. T. Nguyen, F. Nunnari, und D. Sonntag, "An Attention Mechanism using Multiple Knowledge Sources for COVID-19 Detection from CT Images" in Proc. The Thirty-Fifth AAAI Conference on Artificial Intelligence (AAAI-21). AAAI Conference on Artificial Intelligence (AAAI), Workshop on Trustworthy AI for Healthcare, February 2-9, Vancouver,, BC, Canada, 2021.
@inproceedings{pub11369, abstract = {Besides principal polymerase chain reaction (PCR) tests, automatically identifying positive samples based on computed tomography (CT) scans can present a promising option in the early diagnosis of COVID-19. Recently, there have been increasing efforts to utilize deep networks for COVID-19 diagnosis based on CT scans. While these approaches mostly focus on introducing novel architectures, transfer learning techniques or construction of large scale data, we propose a novel strategy to improve several performance baselines by leveraging multiple useful information sources relevant to doctors' judgments. Specifically, infected regions and heat-map features extracted from learned networks are integrated with the global image via an attention mechanism during the learning process. This procedure makes our system more robust to noise and guides the network focusing on local lesion areas. Extensive experiments illustrate the superior performance of our approach compared to recent baselines. Furthermore, our learned network guidance presents an explainable feature to doctors to understand the connection between input and output in a grey-box model.},
year = {2021},
title = {An Attention Mechanism using Multiple Knowledge Sources for COVID-19 Detection from CT Images},
booktitle = {The Thirty-Fifth AAAI Conference on Artificial Intelligence (AAAI-21). AAAI Conference on Artificial Intelligence (AAAI), Workshop on Trustworthy AI for Healthcare, February 2-9, Vancouver,, BC, Canada},
note = {Virtual Conference},
publisher = {AAAI},
author = {Ho Minh Duy Nguyen and Duy M. Nguyen and Huong Vu and Binh T. Nguyen and Fabrizio Nunnari and Daniel Sonntag},
keywords = {Explainable AI, Covid-19, Medical Imaging},
url = {https://www.dfki.de/fileadmin/user_upload/import/11369_AAAI_Workshop_TrustworthyHealthcare_v3.pdf https://arxiv.org/abs/2009.11008}
}
A. Prange, M. Barz, A. Heimann-Steinert, und D. Sonntag, "Explainable Automatic Evaluation of the Trail Making Test for Dementia Screening" in Proc. Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems. ACM International Conference on Human Factors in Computing Systems (CHI-2021), May 8-13, Yokohama, Japan, 2021.
@inproceedings{pub11432, abstract = {The Trail Making Test (TMT) is a frequently used neuropsychological test for assessing cognitive performance. The subject connects a sequence of numbered nodes by using a pen on normal paper. We present an automatic cognitive assessment tool that analyzes samples of the TMT which we record using a digital pen. This enables us to analyze digital pen features that are difficult or impossible to evaluate manually. Our system automatically measures several pen features, including the completion time which is the main performance indicator used by clinicians to score the TMT in practice. In addition, our system provides a structured report of the analysis of the test, for example indicating missed or erroneously connected nodes, thereby offering more objective, transparent and explainable results to the clinician. We evaluate our system with 40 elderly subjects from a geriatrics daycare clinic of a large hospital.},
year = {2021},
title = {Explainable Automatic Evaluation of the Trail Making Test for Dementia Screening},
booktitle = {Proceedings of the 2021 CHI Conference on Human Factors in Computing Systems. ACM International Conference on Human Factors in Computing Systems (CHI-2021), May 8-13, Yokohama, Japan},
note = {Virtual Conference},
isbn = {978-1-4503-8096-6/21/05},
publisher = {Association for Computing Machinery, New York, NY, USA},
author = {Alexander Prange and Michael Barz and Anika Heimann-Steinert and Daniel Sonntag},
url = {https://doi.org/10.1145/3411764.3445046 https://www.dfki.de/fileadmin/user_upload/import/11432_Explainable_Automatic_Evaluation_of_the_Trail_Making_Test_for_Dementia_Screening.pdf}
}
M. Barz und D. Sonntag, Incremental Improvement of a Question Answering System by Re-ranking Answer Candidates Using Machine LearningSpringer, Singapore.
2021.
doi: 10.1007/978-981-15-9323-9_34
@incollection{pub11522, series = {Lecture Notes in Electrical Engineering},
abstract = {We implement a method for re-ranking top-10 results of a state-of-the-art question answering (QA) system. The goal of our re-ranking approach is to improve the answer selection given the user question and the top-10 candidates. We focus on improving deployed QA systems that do not allow re-training or when re-training comes at a high cost. Our re-ranking approach learns a similarity function using n-gram based features using the query, the answer and the initial system confidence as input. Our contributions are: (1) we generate a QA training corpus starting from 877 answers from the customer care domain of T-Mobile Austria, (2) we implement a state-of-the-art QA pipeline using neural sentence embeddings that encode queries in the same space than the answer index, and (3) we evaluate the QA pipeline and our re-ranking approach using a separately provided test set. The test set can be considered to be available after deployment of the system, e.g., based on feedback of users. Our results show that the system performance, in terms of top-n accuracy and the mean reciprocal rank, benefits from re-ranking using gradient boosted regression trees. On average, the mean reciprocal rank improves by 9.15%9.15%9.15textbackslash%.},
year = {2021},
title = {Incremental Improvement of a Question Answering System by Re-ranking Answer Candidates Using Machine Learning},
booktitle = {Increasing Naturalness and Flexibility in Spoken Dialogue Interaction: 10th International Workshop on Spoken Dialogue Systems},
editor = {Erik Marchi and Sabato Marco Siniscalchi and Sandro Cumani and Valerio Mario Salerno and Haizhou Li},
pages = {367-379},
isbn = {9789811593239},
publisher = {Springer, Singapore},
doi = {https://doi.org/10.1007/978-981-15-9323-9_34},
author = {Michael Barz and Daniel Sonntag},
url = {https://doi.org/10.1007/978-981-15-9323-9_34 https://www.dfki.de/fileadmin/user_upload/import/11522_2019_Incremental_Improvement_of_a_Question_Answering_System_by_Re-ranking_Answer_Candidates_using_Machine_Learning.pdf https://arxiv.org/abs/1908.10149}
}
S. Kapp, M. Barz, S. Mukhametov, D. Sonntag, und J. Kuhn, "ARETT: Augmented Reality Eye Tracking Toolkit for Head Mounted Displays" Sensors - Open Access Journal (Sensors), vol. 21, iss. 6, p. 18.
2021.
doi: 10.3390/s21062234
@article{pub11528, abstract = {Currently an increasing number of head mounted displays (HMD) for virtual and augmented reality (VR/AR) are equipped with integrated eye trackers. Use cases of these integrated eye trackers include rendering optimization and gaze-based user interaction. In addition, visual attention in VR and AR is interesting for applied research based on eye tracking in cognitive or educational sciences for example. While some research toolkits for VR already exist, only a few target AR scenarios. In this work, we present an open-source eye tracking toolkit for reliable gaze data acquisition in AR based on Unity 3D and the Microsoft HoloLens 2, as well as an R package for seamless data analysis. Furthermore, we evaluate the spatial accuracy and precision of the integrated eye tracker for fixation targets with different distances and angles to the user (n=21). On average, we found that gaze estimates are reported with an angular accuracy of 0.83 degrees and a precision of 0.27 degrees while the user is resting, which is on par with state-of-the-art mobile eye trackers.},
number = {6},
year = {2021},
title = {ARETT: Augmented Reality Eye Tracking Toolkit for Head Mounted Displays},
journal = {Sensors - Open Access Journal (Sensors)},
volume = {21},
pages = {18},
publisher = {Multidisciplinary Digital Publishing Institute (MDPI)},
doi = {https://doi.org/10.3390/s21062234},
author = {Sebastian Kapp and Michael Barz and Sergey Mukhametov and Daniel Sonntag and Jochen Kuhn},
keywords = {accuracy, augmented reality, eye tracking, precision, toolkit},
url = {https://www.mdpi.com/1424-8220/21/6/2234 https://www.dfki.de/fileadmin/user_upload/import/11528_2021_ARETT-_Augmented_Reality_Eye_Tracking_Toolkit_for_Head_Mounted_Displays.pdf}
}
H. Profitlich und D. Sonntag, "A Case Study on Pros and Cons of Regular Expression Detection and Dependency Parsing for Negation Extraction from German Medical Documents. Technical Report" BMBF, Bundesministerium für Bildung und Forschung Kapelle-Ufer 1 D-10117 Berlin, Technical Report , 2021.
@techreport{pub11611, series = {DFKI Research Reports, RR},
abstract = {We describe our work on information extraction in medical documents written in German, especially detecting negations using an architecture based on the UIMA pipeline. Based on our previous work on software modules to cover medical concepts like diagnoses, examinations, etc. we employ a version of the NegEx regular expression algorithm with a large set of triggers as a baseline. We show how a significantly smaller trigger set is sufficient to achieve similar results, in order to reduce adaptation times to new text types. We elaborate on the question whether dependency parsing (based on the Stanford CoreNLP model) is a good alternative and describe the potentials and shortcomings of both approaches.},
month = {5},
year = {2021},
title = {A Case Study on Pros and Cons of Regular Expression Detection and Dependency Parsing for Negation Extraction from German Medical Documents. Technical Report},
type = {Technical Report},
volume = {1},
pages = {30},
address = {Bundesministerium für Bildung und Forschung Kapelle-Ufer 1 D-10117 Berlin},
institution = {BMBF},
author = {Hans-Jürgen Profitlich and Daniel Sonntag},
keywords = {information extraction (IE); negation detection, regular expression detection, natural language processing; dependency parsing; electronic health record (EHR)},
url = {http://arxiv.org/abs/2105.09702 https://www.dfki.de/fileadmin/user_upload/import/11611_CaseStudy_TR_final.pdf}
}
D. Sonntag, "Künstliche Intelligenz in der Medizin und Gynäkologie – Holzweg oder Heilversprechen?" Der Gynäkologe, vol. 1.
2021.
@article{pub11612, abstract = {Künstliche Intelligenz (KI) hat in den letzten Jahren eine neue Reifephase erreicht und entwickelt sich zum Treiber der Digitalisierung in allen Lebensbereichen. Die KI ist eine Querschnittstechnologie, die für alle Bereiche der Medizin mit Bild‑, Text- und Biodaten von großer Bedeutung ist. Es gibt keinen medizinischen Bereich, der nicht von KI beeinflusst werden wird. Dabei spielt die klinische Entscheidungsunterstützung eine wichtige Rolle. KI-Methoden etablieren sich gerade beim medizinischen Workflow-Management und bei der Vorhersage des Behandlungserfolgs bzw. des Behandlungsergebnisses. KI-Systeme können bereits in Bilddiagnose und im Patientenmanagement unterstützen, aber keine kritischen Entscheidungen vorschlagen. Die jeweiligen Präventions- oder Therapiemaßnahmen können mit KI-Unterstützung sinnvoller bewertet werden, allerdings ist die Abdeckung der Krankheiten noch viel zu gering, um robuste Systeme für den klinischen Alltag zu erstellen. Der flächendeckende Einsatz setzt Fortbildungsmaßnahmen für Ärzte voraus, um die Entscheidung treffen zu können, wann auf automatische Entscheidungsunterstützung vertraut werden kann. Artificial intelligence (AI) has attained a new level of maturity in recent years and is becoming the driver of digitalization in all areas of life. AI is a cross-sectional technology with great importance for all areas of medicine employing image data, text data and bio-data. There is no medical field that will remain unaffected by AI, with AI-assisted clinical decision-making assuming a particularly important role. AI methods are becoming established in medical workflow management and for prediction of treatment success or treatment outcome. AI systems are already able to lend support to imaging-based diagnosis and patient management, but cannot suggest critical decisions. The corresponding preventive or therapeutic measures can be more rationally assessed with the help of AI, although the number of diseases covered is currently too low to create robust systems for routine clinical use. Prerequisite for the widespread use of AI systems is appropriate training to enable physicians to decide when computer-assisted decision-making can be relied upon.},
month = {4},
year = {2021},
title = {Künstliche Intelligenz in der Medizin und Gynäkologie – Holzweg oder Heilversprechen?},
journal = {Der Gynäkologe},
volume = {1},
pages = {1-7},
publisher = {Springer},
author = {Daniel Sonntag},
keywords = {AI-assisted clinical decision-making, imaging-based diagnosis, robust systems for routine clinical use},
url = {https://www.dfki.de/fileadmin/user_upload/import/11612_sonntag-gyn.pdf https://link.springer.com/article/10.1007/s00129-021-04808-2}
}
E. Somfai, B. Baffy, K. Fenech, C. Guo, R. Hosszú, D. Korózs, F. Nunnari, M. Pólik, D. Sonntag, A. Ulbert, und A. Lorincz, "Minimizing false negative rate in melanoma detection and providing insight into the causes of classification" Computing Research Repository eprint Journal (CoRR), vol. abs/2102.09199.
2021.
@article{pub11613, abstract = {Our goal is to bridge human and machine intelligence in melanoma detection. We develop a classification system exploiting a combination of visual pre-processing, deep learning, and ensembling for providing explanations to experts and to minimize false negative rate while maintaining high accuracy in melanoma detection. Source images are first automatically segmented using a U-net CNN. The result of the segmentation is then used to extract image sub-areas and specific parameters relevant in human evaluation, namely center, border, and asymmetry measures. These data are then processed by tailored neural networks which include structure searching algorithms. Partial results are then ensembled by a committee machine. Our evaluation on the largest skin lesion dataset which is publicly available today, ISIC-2019, shows improvement in all evaluated metrics over a baseline using the original images only. We also showed that indicative scores computed by the feature classifiers can provide useful insight into the various features on which the decision can be based.},
year = {2021},
title = {Minimizing false negative rate in melanoma detection and providing insight into the causes of classification},
journal = {Computing Research Repository eprint Journal (CoRR)},
volume = {abs/2102.09199},
pages = {1-14},
publisher = {arXiv},
author = {Ellák Somfai and Benjámin Baffy and Kristian Fenech and Changlu Guo and Rita Hosszú and Dorina Korózs and Fabrizio Nunnari and Marcell Pólik and Daniel Sonntag and Attila Ulbert and András Lorincz},
url = {https://arxiv.org/abs/2102.09199 https://www.dfki.de/fileadmin/user_upload/import/11613_2021_Minimizing_false_negative_rate_in_melanoma_detection_and_providing_insight_into_the_causes_of_classification.pdf}
}
M. Barz, S. Kapp, J. Kuhn, und D. Sonntag, "Automatic Recognition and Augmentation of Attended Objects in Real-Time Using Eye Tracking and a Head-Mounted Display" in Proc. ACM Symposium on Eye Tracking Research and Applications. Symposium on Eye Tracking Research & Applications (ETRA-2021), May 24-27, Virtual, Germany, 2021, p. 4.
doi: 10.1145/3450341.3458766
@inproceedings{pub11614, series = {ETRA '21 Adjunct},
abstract = {Scanning and processing visual stimuli in a scene is essential for the human brain to make situation-aware decisions. Adding the ability to observe the scanning behavior and scene processing to intelligent mobile user interfaces can facilitate a new class of cognition-aware user interfaces. As a first step in this direction, we implement an augmented reality (AR) system that classifies objects at the user’s point of regard, detects visual attention to them, and augments the real objects with virtual labels that stick to the objects in real-time. We use a head-mounted AR device (Microsoft HoloLens 2) with integrated eye tracking capabilities and a front-facing camera for implementing our prototype.},
month = {5},
year = {2021},
title = {Automatic Recognition and Augmentation of Attended Objects in Real-Time Using Eye Tracking and a Head-Mounted Display},
booktitle = {ACM Symposium on Eye Tracking Research and Applications. Symposium on Eye Tracking Research & Applications (ETRA-2021), May 24-27, Virtual, Germany},
pages = {4},
isbn = {9781450383578},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {10.1145/3450341.3458766},
author = {Michael Barz and Sebastian Kapp and Jochen Kuhn and Daniel Sonntag},
keywords = {computer vision, visual attention, cognition-aware computing, eye tracking, augmented reality},
url = {https://doi.org/10.1145/3450341.3458766 https://www.dfki.de/fileadmin/user_upload/import/11614_etra_ar_video.pdf}
}
O. S. Bhatti, M. Barz, und D. Sonntag, "EyeLogin - Calibration-Free Authentication Method for Public Displays Using Eye Gaze" in Proc. ACM Symposium on Eye Tracking Research and Applications. Symposium on Eye Tracking Research & Applications (ETRA-2021), May 24-27, Virtual, Germany, 2021.
doi: 10.1145/3448018.3458001
@inproceedings{pub11616, series = {ETRA '21 Short Papers},
abstract = {The usage of interactive public displays has increased including the number of sensitive applications and, hence, the demand for user authentication methods. In this context, gaze-based authentication was shown to be effective and more secure, but significantly slower than touch- or gesture-based methods. We implement a calibration-free and fast authentication method for situated displays based on saccadic eye movements. In a user study (n = 10), we compare our new method with CueAuth from Khamis et al. (IMWUT’18), an authentication method based on smooth pursuit eye movements. The results show a significant improvement in accuracy from 82.94% to 95.88%. At the same time, we found that the entry speed can be increased enormously with our method, on average, 18.28s down to 5.12s, which is comparable to touch-based input.},
year = {2021},
title = {EyeLogin - Calibration-Free Authentication Method for Public Displays Using Eye Gaze},
booktitle = {ACM Symposium on Eye Tracking Research and Applications. Symposium on Eye Tracking Research & Applications (ETRA-2021), May 24-27, Virtual, Germany},
isbn = {9781450383455},
publisher = {Association for Computing Machinery},
doi = {10.1145/3448018.3458001},
author = {Omair Shahzad Bhatti and Michael Barz and Daniel Sonntag},
keywords = {Calibration-free Eye Tracking, Authentication, Gaze-based Interaction, Eye Tracking, Public Displays},
url = {https://doi.org/10.1145/3448018.3458001 https://www.dfki.de/fileadmin/user_upload/import/11616_EyeLogin.pdf}
}
F. Nunnari und D. Sonntag, "A Software Toolbox for Deploying Deep Learning Decision Support Systems with XAI Capabilities" in Proc. Companion of the 2021 ACM SIGCHI Symposium on Engineering Interactive Computing Systems. ACM SIGCHI Symposium on Engineering Interactive Computing Systems (EICS-2021), June 8-11, Eindhoven/Virtual, Netherlands, 2021.
doi: 10.1145/3459926.3464753
@inproceedings{pub11664, series = {EICS '21},
abstract = {We describe the software architecture of a toolbox of reusable components for the configuration of convolutional neural networks (CNNs) for classification and labeling problems. The toolbox architecture has been designed to maximize the reuse of established algorithms and to include domain experts in the development and evaluation process across different projects and challenges. In addition, we implemented easy-to-edit input formats and modules for XAI (eXplainable AI) through visual inspection capabilities. The toolbox is available for the research community to implement applied artificial intelligence projects.},
year = {2021},
title = {A Software Toolbox for Deploying Deep Learning Decision Support Systems with XAI Capabilities},
booktitle = {Companion of the 2021 ACM SIGCHI Symposium on Engineering Interactive Computing Systems. ACM SIGCHI Symposium on Engineering Interactive Computing Systems (EICS-2021), June 8-11, Eindhoven/Virtual, Netherlands},
isbn = {9781450384490},
publisher = {Association for Computing Machinery},
doi = {10.1145/3459926.3464753},
author = {Fabrizio Nunnari and Daniel Sonntag},
keywords = {design patterns, object-oriented architecture, deep learning, Software toolbox, convolutional neural networks, explainable AI.},
url = {https://www.dfki.de/fileadmin/user_upload/import/11664_nunnari21EICS-TIML.pdf https://doi.org/10.1145/3459926.3464753}
}
M. Barz und D. Sonntag, "Automatic Visual Attention Detection for Mobile Eye Tracking Using Pre-Trained Computer Vision Models and Human Gaze" Sensors - Open Access Journal (Sensors), vol. 21, iss. 12, p. 21.
2021.
doi: 10.3390/s21124143
@article{pub11668, abstract = {Processing visual stimuli in a scene is essential for the human brain to make situation-aware decisions. These stimuli, which are prevalent subjects of diagnostic eye tracking studies, are commonly encoded as rectangular areas of interest (AOIs) per frame. Because it is a tedious manual annotation task, the automatic detection and annotation of visual attention to AOIs can accelerate and objectify eye tracking research, in particular for mobile eye tracking with egocentric video feeds. In this work, we implement two methods to automatically detect visual attention to AOIs using pre-trained deep learning models for image classification and object detection. Furthermore, we develop an evaluation framework based on the VISUS dataset and well-known performance metrics from the field of activity recognition. We systematically evaluate our methods within this framework, discuss potentials and limitations, and propose ways to improve the performance of future automatic visual attention detection methods.},
number = {12},
year = {2021},
title = {Automatic Visual Attention Detection for Mobile Eye Tracking Using Pre-Trained Computer Vision Models and Human Gaze},
journal = {Sensors - Open Access Journal (Sensors)},
volume = {21},
pages = {21},
publisher = {MDPI},
doi = {10.3390/s21124143},
author = {Michael Barz and Daniel Sonntag},
url = {https://www.mdpi.com/1424-8220/21/12/4143 https://www.dfki.de/fileadmin/user_upload/import/11668_sensors-21-04143-v2.pdf}
}
A. Prange und D. Sonntag, "Assessing Cognitive Test Performance Using Automatic Digital Pen Features Analysis" in Proc. Proceedings of the 29th ACM Conference on User Modeling, Adaptation and Personalization. International Conference on User Modeling, Adaptation, and Personalization (UMAP-2021), June 21-25, Utrecht/Virtual, Netherlands, 2021.
doi: 10.1145/3450613.3456812
@inproceedings{pub11703, abstract = {Most cognitive assessments, for dementia screening for example, are conducted with a pen on normal paper. We record these tests with a digital pen as part of a new interactive cognitive assessment tool with automatic analysis of pen input. The clinician can, first, observe the sketching process in real-time on a mobile tablet, e.g., in telemedicine settings or to follow Covid-19 distancing regulations. Second, the results of an automatic test analysis are presented to the clinician in real-time, thereby reducing manual scoring effort and producing objective reports. The presented research describes the architecture of our cognitive assessment tool and examines how accurately different machine learning (ML) models can automatically score cognitive tests, without a semantic content analysis. Our system uses a set of more than 170 pen features, calculated directly from the raw digital pen signal. We evaluate our system with 40 subjects from a geriatrics daycare clinic. Using standard ML techniques our feature set outperforms previous approaches on the cognitive tests we consider, i.e., the Clock Drawing, the Rey-Osterrieth Complex Figure, and the Trail Making Test, by automatically scoring tests with up to 82% accuracy in a binary classification task.},
year = {2021},
title = {Assessing Cognitive Test Performance Using Automatic Digital Pen Features Analysis},
booktitle = {Proceedings of the 29th ACM Conference on User Modeling, Adaptation and Personalization. International Conference on User Modeling, Adaptation, and Personalization (UMAP-2021), June 21-25, Utrecht/Virtual, Netherlands},
isbn = {9781450383660},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/3450613.3456812},
author = {Alexander Prange and Daniel Sonntag},
keywords = {Rey-Osterrieth Complex Figure, Cognitive Assessments, Clock Drawing Test, Pen Features, Digital Pen, Deep Learning, Neurocognitive Testing, Machine Learning, Trail Making Test},
url = {https://dl.acm.org/doi/10.1145/3450613.3456812}
}
H. M. D. Nguyen, T. T. Mai, N. T. T. Than, A. Prange, und D. Sonntag, "Self-Supervised Domain Adaptation for Diabetic Retinopathy Grading using Vessel Image Reconstruction" in Proc. Proceedings of the 44th German Conference on Artificial Intelligence. German Conference on Artificial Intelligence (KI-2021), September 27 - October 1, Berlin/Virtual, Germany, 2021.
@inproceedings{pub11715, abstract = {This paper investigates the problem of domain adaptation for diabetic retinopathy (DR) grading. We learn invariant target-domain features by defining a novel self-supervised task based on retinal vessel image reconstructions, inspired by medical domain knowledge. Then, a benchmark of current state-of-the-art unsupervised domain adaptation methods on the DR problem is provided. It can be shown that our approach outperforms existing domain adaption strategies. Furthermore, when utilizing entire training data in the target domain, we are able to compete with several state-of-the-art approaches in final classification accuracy just by applying standard network architectures and using image-level labels.},
year = {2021},
title = {Self-Supervised Domain Adaptation for Diabetic Retinopathy Grading using Vessel Image Reconstruction},
booktitle = {Proceedings of the 44th German Conference on Artificial Intelligence. German Conference on Artificial Intelligence (KI-2021), September 27 - October 1, Berlin/Virtual, Germany},
publisher = {Springer},
author = {Ho Minh Duy Nguyen and Truong Thanh-Nhat Mai and Ngoc Trong Tuong Than and Alexander Prange and Daniel Sonntag},
keywords = {Domain Adaption, Diabetic Retinopathy, Self-Supervised Learning, Deep Learning, Interactive Machine Learning},
url = {https://www.dfki.de/fileadmin/user_upload/import/11715_KI_2021_Self_Supervised_Domain_Adaptation_for_Diabetic_Retinopathy_Grading.pdf https://link.springer.com/chapter/10.1007/978-3-030-87626-5_26}
}
F. Nunnari, M. A. Kadir, und D. Sonntag, "On the Overlap Between Grad-CAM Saliency Maps and Explainable Visual Features in Skin Cancer Images" in Proc. Machine Learning and Knowledge Extraction. International IFIP Cross Domain (CD) Conference for Machine Learning & Knowledge Extraction (MAKE) (CD-MAKE-2021), August 17-20, Virtual, 2021.
@inproceedings{pub11802, abstract = {Dermatologists recognize melanomas by inspecting images in which they identify human-comprehensible visual features. In this paper, we investigate to what extent such features correspond to the saliency areas identified on CNNs trained for classification. Our experiments, conducted on two neural architectures characterized by different depth and different resolution of the last convolutional layer, quantify to what extent thresholded Grad-CAM saliency maps can be used to identify visual features of skin cancer. We found that the best threshold value, i.e., the threshold at which we can measure the highest Jaccard index, varies significantly among features; ranging from 0.3 to 0.7. In addition, we measured Jaccard indices as high as 0.143, which is almost 50% of the performance of state-of-the-art architectures specialized in feature mask prediction at pixel-level, such as U-Net. Finally, a breakdown test between malignancy and classification correctness shows that higher resolution saliency maps could help doctors in spotting wrong classifications.},
year = {2021},
title = {On the Overlap Between Grad-CAM Saliency Maps and Explainable Visual Features in Skin Cancer Images},
booktitle = {Machine Learning and Knowledge Extraction. International IFIP Cross Domain (CD) Conference for Machine Learning & Knowledge Extraction (MAKE) (CD-MAKE-2021), August 17-20, Virtual},
editor = {Andreas Holzinger and Peter Kieseberg and A. Min Tjoa and Edgar Weippl},
volume = {12844},
pages = {241-253},
isbn = {978-3-030-84060-0},
publisher = {Springer International Publishing},
author = {Fabrizio Nunnari and Md Abdul Kadir and Daniel Sonntag},
url = {https://doi.org/10.1007/978-3-030-84060-0_16 https://www.dfki.de/fileadmin/user_upload/import/11802_2021_CD_MAKE_XAI_and_SkinFeatures.pdf}
}
F. Nunnari, H. M. T. Alam, und D. Sonntag, "Anomaly Detection for Skin Lesion Images Using Replicator Neural Networks" in Proc. Machine Learning and Knowledge Extraction. International IFIP Cross Domain (CD) Conference for Machine Learning & Knowledge Extraction (MAKE) (CD-MAKE-2021), August 17-20, Virtual, 2021.
@inproceedings{pub11803, abstract = {This paper presents an investigation on the task of anomaly detection for images of skin lesions. The goal is to provide a decision support system with an extra filtering layer to inform users if a classifier should not be used for a given sample. We tested anomaly detectors based on autoencoders and three discrimination methods: feature vector distance, replicator neural networks, and support vector data description fine-tuning. Results show that neural-based detectors can perfectly discriminate between skin lesions and open world images, but class discrimination cannot easily be accomplished and requires further investigation.},
year = {2021},
title = {Anomaly Detection for Skin Lesion Images Using Replicator Neural Networks},
booktitle = {Machine Learning and Knowledge Extraction. International IFIP Cross Domain (CD) Conference for Machine Learning & Knowledge Extraction (MAKE) (CD-MAKE-2021), August 17-20, Virtual},
editor = {Andreas Holzinger and Peter Kieseberg and A. Min Tjoa and Edgar Weippl},
volume = {12844},
pages = {225-240},
isbn = {978-3-030-84060-0},
publisher = {Springer International Publishing},
author = {Fabrizio Nunnari and Hasan Md Tusfiqur Alam and Daniel Sonntag},
url = {https://doi.org/10.1007/978-3-030-84060-0_15 https://www.dfki.de/fileadmin/user_upload/import/11803_2021_CD_MAKE_AnomalyDetection.pdf}
}
R. Biswas, M. Barz, M. Hartmann, und D. Sonntag, "Improving German Image Captions using Machine Translation and Transfer Learning" in Proc. Statistical Language and Speech Processing SLSP 2021. International Conference on Statistical Language and Speech Processing (SLSP), 8th-9th, November 22-26, Cardiff, United Kingdom, Council Chamber Glamorgan Building King Edward VII Ave Cathays Park Cardiff CF10 3WT, 2021.
@inproceedings{pub11805, series = {Lecture Notes in Computer Science / Lecture Notes in Artificial Intelligence, LNCS / LNAI},
abstract = {Image captioning is a complex artificial intelligence task that involves many fundamental questions of data representation, learning, and natural language processing. In addition, most of the work in this domain addresses the English language because of the high availability of annotated training data compared to other languages. Therefore, we investigate methods for image captioning in German that transfer knowledge from English training data. We explore four different methods for generating image captions in German, two baseline methods and two more advanced ones based on transfer learning. The baseline methods are based on a state-of-the-art model which we train using a translated version of the English MS COCO dataset and the smaller German Multi30K dataset, respectively. Both advanced methods are pre-trained using the translated MS COCO dataset and fine-tuned for German on the Multi30K dataset. One of these methods uses an alternative attention mechanism from the literature that showed a good performance in English image captioning. We compare the performance of all methods for the Multi30K test set in German using common automatic evaluation metrics. We show that our advanced method with the alternative attention mechanism presents a new baseline for German BLEU, ROUGE, CIDEr, and SPICE scores, and achieves a relative improvement of 21.2 % in BLEU-4 score compared to the current state-of-the-art in German image captioning.},
month = {11},
year = {2021},
title = {Improving German Image Captions using Machine Translation and Transfer Learning},
booktitle = {Statistical Language and Speech Processing SLSP 2021. International Conference on Statistical Language and Speech Processing (SLSP), 8th-9th, November 22-26, Cardiff, United Kingdom},
editor = {Luis Espinosa-Anke and Carlos Martin-Vide and Irena Spasic},
address = {Council Chamber Glamorgan Building King Edward VII Ave Cathays Park Cardiff CF10 3WT},
publisher = {Springer, Heidelberg},
author = {Rajarshi Biswas and Michael Barz and Mareike Hartmann and Daniel Sonntag},
keywords = {Natural language understanding and generation, Multimodal technologies, Image Captioning, Natural Language Processing},
organization = {Cardiff University},
url = {https://www.dfki.de/fileadmin/user_upload/import/11805_SLSP2021Paper.pdf}
}
R. Kær. Jørgensen, M. Hartmann, X. Dai, und D. Elliott, "MDAPT: Multilingual Domain Adaptive Pretraining in a Single Model" in Proc. Findings of the Association for Computational Linguistics - EMNLP 2021. Conference on Empirical Methods in Natural Language Processing (EMNLP-2021), November 7-11, Online, 2021.
@inproceedings{pub11845, abstract = {Domain adaptive pretraining, i.e. the continued unsupervised pretraining of a language model on domain-specific text, improves the modelling of text for downstream tasks within the domain. Numerous real-world applications are based on domain-specific text, e.g. working with financial or biomedical documents, and these applications often need to support multiple languages. However, large-scale domain-specific multilingual pretraining data for such scenarios can be difficult to obtain, due to regulations, legislation, or simply a lack of language- and domain-specific text. One solution is to train a single multilingual model, taking advantage of the data available in as many languages as possible. In this work, we explore the benefits of domain adaptive pretraining with a focus on adapting to multiple languages within a specific domain. We propose different techniques to compose pretraining corpora that enable a language model to both become domain-specific and multilingual. Evaluation on nine domain-specific datasets---for biomedical named entity recognition and financial sentence classification---covering seven different languages show that a single multilingual domain-specific model can outperform the general multilingual model, and performs close to its monolingual counterpart. This finding holds across two different pretraining methods, adapter-based pretraining and full model pretraining.},
month = {11},
year = {2021},
title = {mDAPT: Multilingual Domain Adaptive Pretraining in a Single Model},
booktitle = {Findings of the Association for Computational Linguistics - EMNLP 2021. Conference on Empirical Methods in Natural Language Processing (EMNLP-2021), November 7-11, Online},
volume = {1},
pages = {3404-3018},
publisher = {Association for Computational Linguistics},
author = {Rasmus Kær Jørgensen and Mareike Hartmann and Xiang Dai and Desmond Elliott},
keywords = {domain adaptive pretraining, multilingual language model},
url = {https://aclanthology.org/2021.findings-emnlp.290.pdf https://www.dfki.de/fileadmin/user_upload/import/11845_2021.findings-emnlp.290.pdf}
}
M. Hartmann, M. de Lhoneux, D. Hershcovich, Y. Kementchedjhieva, L. Nielsen, C. Qiu, und A. Søgaard, "A Multilingual Benchmark for Probing Negation-Awareness with Minimal Pairs" in Proc. Proceedings of the 25th Conference on Computational Natural Language Learning (CoNLL). Conference on Computational Natural Language Learning (CoNLL-2021), November 10-11, Online, 2021.
@inproceedings{pub11846, abstract = {Negation is one of the most fundamental concepts in human cognition and language, and several natural language inference (NLI) probes have been designed to investigate pretrained language models' ability to detect and reason with negation. However, the existing probing datasets are limited to English only, and do not enable controlled probing of performance in the absence or presence of negation. In response, we present a multilingual (English, Bulgarian, German, French and Chinese) benchmark collection of NLI examples that are grammatical and correctly labeled, as a result of manual inspection and editing. We use the benchmark to probe the negation-awareness of multilingual language models and find that models that correctly predict examples with negation cues often fail to correctly predict their counter-examples {\em without} negation cues, even when the cues are irrelevant for semantic inference.},
month = {11},
year = {2021},
title = {A Multilingual Benchmark for Probing Negation-Awareness with Minimal Pairs},
booktitle = {Proceedings of the 25th Conference on Computational Natural Language Learning (CoNLL). Conference on Computational Natural Language Learning (CoNLL-2021), November 10-11, Online},
pages = {224-257},
publisher = {Association for Computational Linguistics},
author = {Mareike Hartmann and Miryam de Lhoneux and Daniel Hershcovich and Yova Kementchedjhieva and Lukas Nielsen and Chen Qiu and Anders Søgaard},
keywords = {negation, probing, multilingual},
url = {https://aclanthology.org/2021.conll-1.19/ https://www.dfki.de/fileadmin/user_upload/import/11846_2021.conll-1.19.pdf}
}
F. Nunnari, A. Ezema, und D. Sonntag, "Crop It, but Not Too Much: The Effects of Masking on the Classification of Melanoma Images" in Proc. KI 2021: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2021), September 27 - October 1, Germany, 2021.
@inproceedings{pub11859, abstract = {To improve the accuracy of convolutional neural networks in discriminating between nevi and melanomas, we test nine different combinations of masking and cropping on three datasets of skin lesion images (ISIC2016, ISIC2018, and MedNode). Our experiments, confirmed by 10-fold cross-validation, show that cropping increases classification performances, but specificity decreases when cropping is applied together with masking out healthy skin regions. An analysis of Grad-CAM saliency maps shows that in fact our CNN models have the tendency to focus on healthy skin at the border when a nevus is classified.},
year = {2021},
title = {Crop It, but Not Too Much: The Effects of Masking on the Classification of Melanoma Images},
booktitle = {KI 2021: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2021), September 27 - October 1, Germany},
editor = {Stefan Edelkamp and Ralf Möller and Elmar Rueckert},
pages = {179-193},
isbn = {978-3-030-87626-5},
publisher = {Springer International Publishing},
author = {Fabrizio Nunnari and Abraham Ezema and Daniel Sonntag},
keywords = {skin cancer, convolutional neural networks, image segmentation, masking, preprocessing, reducing bias},
url = {https://link.springer.com/chapter/10.1007/978-3-030-87626-5_13 https://www.dfki.de/fileadmin/user_upload/import/11859_2021_KIconference_SkinLesionMasking.pdf}
}
L. Lauer, K. Altmeyer, S. Malone, M. Barz, R. Brünken, D. Sonntag, und M. Peschel, "Investigating the Usability of a Head-Mounted Display Augmented Reality Device in Elementary School Children" Sensors - Open Access Journal (Sensors), vol. 21, iss. 19, p. 20.
2021.
doi: 10.3390/s21196623
@article{pub11866, abstract = {Augmenting reality via head-mounted displays (HMD-AR) is an emerging technology in education. The interactivity provided by HMD-AR devices is particularly promising for learning, but presents a challenge to human activity recognition, especially with children. Recent technological advances regarding speech and gesture recognition concerning Microsoft’s HoloLens 2 may address this prevailing issue. In a within-subjects study with 47 elementary school children (2nd to 6th grade), we examined the usability of the HoloLens 2 using a standardized tutorial on multimodal interaction in AR. The overall system usability was rated “good”. However, several behavioral metrics indicated that specific interaction modes differed in their efficiency. The results are of major importance for the development of learning applications in HMD-AR as they partially deviate from previous findings. In particular, the well-functioning recognition of children’s voice commands that we observed represents a novelty. Furthermore, we found different interaction preferences in HMD-AR among the children. We also found the use of HMD-AR to have a positive effect on children’s activity-related achievement emotions. Overall, our findings can serve as a basis for determining general requirements, possibilities, and limitations of the implementation of educational HMD-AR environments in elementary school classrooms.},
number = {19},
year = {2021},
title = {Investigating the Usability of a Head-Mounted Display Augmented Reality Device in Elementary School Children},
journal = {Sensors - Open Access Journal (Sensors)},
volume = {21},
pages = {20},
publisher = {MDPI},
doi = {10.3390/s21196623},
author = {Luisa Lauer and Kristin Altmeyer and Sarah Malone and Michael Barz and Roland Brünken and Daniel Sonntag and Markus Peschel},
url = {https://www.mdpi.com/1424-8220/21/19/6623 https://www.dfki.de/fileadmin/user_upload/import/11866_sensors-21-06623.pdf}
}
M. Hartmann, I. Kruijff-Korbayová, und D. Sonntag, Interaction with Explanations in the XAINES Project-.
@misc{pub11867, abstract = {AI systems are increasingly pervasive and their large-scale adoption makes it necessary to explain their behaviour, for example to their users who are impacted by their decisions, or to their developers who need to ensure their functionality. This requires, on the one hand, to obtain an accurate representation of the chain of events that caused the system to behave in a certain way (e.g., to make a specific decision). On the other hand, this causal chain needs to be communicated to the users depending on their needs and expectations. In this phase of explanation delivery, allowing interaction between user and model has the potential to improve both model quality and user experience. In this abstract, we present our planned and on-going work on the interaction with explanations as part of the XAINES project. The project investigates the explanation of AI systems through narratives targeted to the needs of a specific audience, and our work focuses on the question of how and in which way human-model interaction can enable successful explanation.},
month = {9},
year = {2021},
title = {Interaction with Explanations in the XAINES Project},
booktitle = {Trustworthy AI in the wild},
howpublished = {Trustworthy AI in the Wild Workshop 2021},
publisher = {-},
author = {Mareike Hartmann and Ivana Kruijff-Korbayová and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/11867_AI_in_the_wild__Xaines.pdf}
}
S. Malone, K. Altmeyer, M. Barz, L. Lauer, D. Sonntag, M. Peschel, und R. Brünken, Measuring Intrisic and Extraneous Cognitive Load in Elementary School Students Using Subjective Ratings and Smart Pen Data.
@misc{pub11868, abstract = {New methods are constantly being developed to optimize and adapt cognitive load measurement to different contexts (Korbach et al., 2018). It is noteworthy, however, that research on cognitive load measurement in elementary school students is rare. Although there is some evidence that they might be able to report their total cognitive load (Ayres, 2006), there are also reasons to doubt the quality of children’s self-reports (e.g., Chambers & Johnson, 2002). To avoid these issues, behavioral and objective online-measures are promising. A novel approach – the use of smartpen data generated by natural use of a pen during task completion – seems particularly encouraging as these measures proved to be predictive of cognitive load in adults (e.g., Yu, Epps, & Chen, 2011). Moreover, Barz et al. (2020) demonstrated the predictive power of smartpen data for performance in children. The present research addressed two prevailing gaps in research on cognitive load assessment in elementary school students. We developed a subjective rating scale and investigated whether this instrument can provide valid measurements of ICL and ECL (Research Question 1). Moreover, we researched whether smartpen data can be used as a valid process measurement of cognitive load (Research Question 2).},
year = {2021},
title = {Measuring Intrisic and Extraneous Cognitive Load in Elementary School Students Using Subjective Ratings and Smart Pen Data},
howpublished = {13th International Cognitive Load Theory Conference},
author = {Sarah Malone and Kristin Altmeyer and Michael Barz and Luisa Lauer and Daniel Sonntag and Markus Peschel and Roland Brünken},
url = {https://www.dfki.de/fileadmin/user_upload/import/11868_Cl_measurement_in_children.pdf}
}
K. Altmeyer, S. Malone, S. Kapp, M. Barz, L. Lauer, M. Thees, J. Kuhn, M. Peschel, D. Sonntag, und R. Brünken, The effect of augmented reality on global coherence formation processes during STEM laboratory work in elementary school children.
@misc{pub11870, abstract = {In science education, hands-on student experiments are used to explore cause and effect relationships. Conventional lab work requires students to interact with physical experimentation objects and observe additional information like measurement values to deduce scientific laws and interrelations. The observable information, however, are usually presented in physical distance to the setting, e.g., on a separate display of a measuring device. The resulting spatial split (Chandler & Sweller, 1991) between representations hampers global coherence formation (Seufert & Brünken, 2004): Mapping processes between the spatially distant sources of information are assumed to lead to an increase in extraneous cognitive load (ECL; Ayres & Sweller, 2014). Consequently, learning outcomes can be impaired (Kalyuga et al., 1999). Augmented Reality (AR) can be used to overcome the split-attention effect by allowing additional information to be virtually integrated into the real-world set-up (Azuma, 1997). A study by Altmeyer et al. (2020) with university students showed that AR-support during experimentation led to a higher conceptual knowledge gain but had no effect on ECL. The current study provides a conceptual replication of Altmeyer et al.’s (2020) research and focuses on three main objectives: First, we aimed at investigating the generalizability of the advantage of AR on experimental learning in a sample of elementary school children. Second, we examined if low prior-knowledge of children even amplifies the split-attention effect, as proposed by Kalyuga et al. (1998). Finally, we focused on obtaining deeper insights into global coherence formation processes during lab work using specific tests and eye tracking measures.},
year = {2021},
title = {The effect of augmented reality on global coherence formation processes during STEM laboratory work in elementary school children},
howpublished = {13th International Cognitive Load Theory Conference},
author = {Kristin Altmeyer and Sarah Malone and Sebastian Kapp and Michael Barz and Luisa Lauer and Michael Thees and Jochen Kuhn and Markus Peschel and Daniel Sonntag and Roland Brünken},
url = {https://www.dfki.de/fileadmin/user_upload/import/11870_ICLTC_2021_Altmeyer_final.pdf}
}
K. Altmeyer, S. Malone, S. Kapp, M. Barz, L. Lauer, M. Thees, J. Kuhn, M. Peschel, D. Sonntag, und R. Brünken, Augmented Reality zur Förderung globaler Kohärenzbildungsprozesse beim Experimentieren im Sachunterricht.
@misc{pub11871, abstract = {Augmented Reality (AR) lässt sich als eine Form virtueller Umgebungen auf einem Realitäts-Virtualitäts-Kontinuum (Milgram & Kishino, 1994) der gemischten Realität zuordnen. AR erweitert die Realität durch die Integration virtueller Objekte. Ein vielversprechendes Anwendungsgebiet für AR im Bildungsbereich bietet das technologiegestützte Experimentieren: Experimente bilden ein wesentliches Merkmal der Naturwissenschaften und werden im MINT-Unterricht eingesetzt, um Zusammenhänge zu untersuchen. Bisherige Forschung deutet darauf hin, dass bereits Kinder im Grundschulalter (natur)wissenschaftliches Denken und die Fähigkeit zum Experimentieren entwickeln können (z.B. Osterhaus et al., 2015). Um Ursache-Wirkung-Beziehungen aus einem Experiment abzuleiten, müssen Lernende meist reale Informationen der Experimentierumgebung mit virtuellen Informationen, wie z.B. Messwerten auf Messwertdisplays, mental verknüpfen. Im Sinne der Cognitive Theory of Multimedia Learning (Mayer, 2005) und der Cognitive Load Theory (Sweller et al., 1998) stellt die Verknüpfung räumlich getrennter Informationen eine besondere Herausforderung an das Arbeitsgedächtnis dar. AR kann dazu genutzt werden, reale und virtuelle Informationen beim Experimentieren integriert darzustellen. Vorausgehende Studienergebnisse (z.B. Altmeyer et al., 2020) implizieren, dass AR die globale Kohärenzbildung (Seufert & Brünken, 2004) unterstützt und zu besseren Lernergebnissen führen kann (Altmeyer et. al., 2020). In der vorliegenden Studie wurde der Effekt von AR-Unterstützung beim Experimentieren in einer Stichprobe von Grundschulkindern untersucht. Nach einem Vorwissenstest führten 59 Kinder Experimente zu elektrischen Schaltkreisen durch. Einer Gruppe wurden Echzeit-Messwerte für die Stromstärke in einer Tabelle auf einem separaten Tabletbildschirm präsentiert. Dagegen sah die AR-unterstützte Gruppe die Messwerte beim Blick durch eine Tabletkamera in die Experimentierumgebung integriert. Während des Experimentierens wurden die Blickbewegungen der Kinder erfasst. Danach bearbeiteten beide Gruppen Posttests, welche in ihren Anforderungen an die globale Kohärenzbildung zwischen realen und virtuellen Elementen beim Experimentieren variierten. Erste Ergebnisse zeigen, dass Kinder insbesondere hinsichtlich Aufgaben, die eine starke globale Kohärenz erfordern, von der AR-Umgebung profitieren. Blickbewegungsanalysen sollen weitere Aufschlüsse über den Prozess der Kohärenzbildung während des Experimentierens in AR geben.},
year = {2021},
title = {Augmented Reality zur Förderung globaler Kohärenzbildungsprozesse beim Experimentieren im Sachunterricht},
howpublished = {Tagung der Fachgruppe Pädagogische Psychologie},
author = {Kristin Altmeyer and Sarah Malone and Sebastian Kapp and Michael Barz and Luisa Lauer and Michael Thees and Jochen Kuhn and Markus Peschel and Daniel Sonntag and Roland Brünken},
url = {https://www.dfki.de/fileadmin/user_upload/import/11871_v3_Altmeyer_VR_Symposium_PAEPSY_2021.pdf}
}
A. Prange und D. Sonntag, "A Demonstrator for Interactive Image Clustering and Fine-Tuning Neural Networks in Virtual Reality" in Proc. KI 2021: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2021), Germany, 2021.
@inproceedings{pub11886, abstract = {We present a virtual reality (VR) application that enables us to interactively explore and manipulate image clusters based on layer activations of convolutional neural networks (CNNs). We apply dimensionality reduction techniques to project images into the 3D space, where the user can directly interact with the model. The user can change the position of an image by using natural hand gestures. This manipulation triggers additional training steps of the network, based on the new spatial information and new label of the image. After the training step is finished, the visualization is updated according to the new output of the CNN. The goal is to visualize and improve the cluster output of the model, and at the same time, to improve the understanding of the model. We discuss two different approaches for calculating the VR projection, a combined PCA/t-SNE dimensionality reduction based approach and a variational auto-encoder (VAE) based approach.},
year = {2021},
title = {A Demonstrator for Interactive Image Clustering and Fine-Tuning Neural Networks in Virtual Reality},
booktitle = {KI 2021: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2021), Germany},
editor = {Stefan Edelkamp and Ralf Möller and Elmar Rueckert},
pages = {194-203},
isbn = {978-3-030-87626-5},
publisher = {Springer International Publishing},
author = {Alexander Prange and Daniel Sonntag},
keywords = {Virtual Reality, Deep Learning, Convolutional Neural Network, Variational Auto-Encoder, PCA, t-SNE},
url = {https://link.springer.com/chapter/10.1007/978-3-030-87626-5_14}
}
F. Erlemeyer, C. Rehtanz, A. Hermanns, B. Lüers, M. Nebel-Wenner, und R. J. Eilers, "Live Testing of Flexibilities on Distribution Grid Level – Simulation Setup and Lessons Learned" in Proc. IEEE Electric Power and Energy Conference. IEEE Electric Power and Energy Conference (EPEC-2021), October 22-31, Toronto,, Ontario, Canada, IEEE Operations Center 445 Hoes Lane Piscataway, NJ 08854-4141 USA Phone: +1 732 981 0060, 2021.
@inproceedings{pub11927, abstract = {In the DESIGNETZ project real flexibility units were connected to a distribution grid simulation to investigate the integration of decentralized flexibilities for different use-cases. The simulation determines the demand for unit flexibility and communicates the demand to the flexibilities. In return, the response of the flexibilities is integrated back into the simulation to consider not-simulated effects, too. This paper presents the simulation setup and discusses lessons learnt from deploying the simulation into operation.},
month = {10},
year = {2021},
title = {Live Testing of Flexibilities on Distribution Grid Level – Simulation Setup and Lessons Learned},
booktitle = {IEEE Electric Power and Energy Conference. IEEE Electric Power and Energy Conference (EPEC-2021), October 22-31, Toronto,, Ontario, Canada},
address = {IEEE Operations Center 445 Hoes Lane Piscataway, NJ 08854-4141 USA Phone: +1 732 981 0060},
publisher = {IEEE Xplore},
author = {Fabian Erlemeyer and Christian Rehtanz and Annegret Hermanns and Bengt Lüers and Marvin Nebel-Wenner and Reef Janes Eilers},
keywords = {flexibility, real-world application, active distribution grids, congestion management, energy system simulation},
organization = {IEEE},
url = {https://www.dfki.de/fileadmin/user_upload/import/11927_2021199998.pdf https://ieeexplore.ieee.org/document/9621559}
}
M. Barz, O. S. Bhatti, B. Lüers, A. Prange, und D. Sonntag, "Multisensor-Pipeline: A Lightweight, Flexible, and Extensible Framework for Building Multimodal-Multisensor Interfaces" in Proc. Companion Publication of the 2021 International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2021), October 18-22, Montréal,, QC, Canada, 2021.
doi: 10.1145/3461615.3485432
@inproceedings{pub11981, abstract = {We present the multisensor-pipeline (MSP), a lightweight, flexible, and extensible framework for prototyping multimodal-multisensor interfaces based on real-time sensor input. Our open-source framework (available on GitHub) enables researchers and developers to easily integrate multiple sensors or other data streams via source modules, to add stream and event processing capabilities via processor modules, and to connect user interfaces or databases via sink modules in a graph-based processing pipeline. Our framework is implemented in Python with a low number of dependencies, which enables a quick setup process, execution across multiple operating systems, and direct access to cutting-edge machine learning libraries and models. We showcase the functionality and capabilities of MSP through a sample application that connects a mobile eye tracker to classify image patches surrounding the user’s fixation points and visualizes the classification results in real-time.},
year = {2021},
title = {Multisensor-Pipeline: A Lightweight, Flexible, and Extensible Framework for Building Multimodal-Multisensor Interfaces},
booktitle = {Companion Publication of the 2021 International Conference on Multimodal Interaction. ACM International Conference on Multimodal Interaction (ICMI-2021), October 18-22, Montréal,, QC, Canada},
pages = {13-18},
isbn = {9781450384711},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3461615.3485432},
author = {Michael Barz and Omair Shahzad Bhatti and Bengt Lüers and Alexander Prange and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/11981_icmi_cr.pdf https://dl.acm.org/doi/10.1145/3461615.3485432}
}
D. Sonntag, "Künstliche Intelligenz gegen das Coronavirus" DFKI, BMBF, BMG2020.
@techreport{pub10809, series = {DFKI Technical Memos, TM},
abstract = {Künstliche Intelligenz hat in den letzten Jahren eine neue Reifephase erreicht und entwickelt sich zum Treiber der Digitalisierung in allen Lebensbereichen. Die KI ist eine Querschnittstechnologie, die für alle Bereiche der Medizin mit Bilddaten, Textdaten und Biodaten von großer Bedeutung ist. Es gibt keinen medizinischen Bereich, der nicht von KI beeinflusst werden wird (siehe auch http://www.dfki.de/ MedicalCPS/?p=1111). Hier werden vier Felder gegen das Coronavirus beleuchtet, (1) die Bilddiagnostik, (2) Gensequenzierung, (3) die automatische Auswertung medizinischer Texte und (4) das Katastrophenmanagement.},
year = {2020},
title = {Künstliche Intelligenz gegen das Coronavirus},
volume = {1},
institution = {DFKI, BMBF, BMG},
author = {Daniel Sonntag},
keywords = {Bilddiagnostik, Gensequenzierung, NLP, Katastrophenmanagement},
url = {https://www.dfki.de/fileadmin/user_upload/import/10809_corona2.pages.pdf}
}
M. Barz, S. Stauden, und D. Sonntag, "Visual Search Target Inference in Natural Interaction Settings with Machine Learning" in Proc. ACM Symposium on Eye Tracking Research and Applications. Symposium on Eye Tracking Research & Applications (ETRA-2020), Stuttgart, Germany, 2020.
doi: 10.1145/3379155.3391314
@inproceedings{pub10893, abstract = {Visual search is a perceptual task in which humans aim at identifying a search target object such as a traffic sign among other objects. Search target inference subsumes computational methods for predicting this target by tracking and analyzing overt behavioral cues of that person, e.g., the human gaze and fixated visual stimuli. We present a generic approach to inferring search targets in natural scenes by predicting the class of the surrounding image segment. Our method encodes visual search sequences as histograms of fixated segment classes determined by SegNet, a deep learning image segmentation model for natural scenes. We compare our sequence encoding and model training (SVM) to a recent baseline from the literature for predicting the target segment. Also, we use a new search target inference dataset. The results show that, first, our new segmentation-based sequence encoding outperforms the method from the literature, and second, that it enables target inference in natural settings.},
month = {5},
year = {2020},
title = {Visual Search Target Inference in Natural Interaction Settings with Machine Learning},
booktitle = {ACM Symposium on Eye Tracking Research and Applications. Symposium on Eye Tracking Research & Applications (ETRA-2020), Stuttgart, Germany},
editor = {Andreas Bulling and Anke Huckauf and Eakta Jain and Ralph Radach and Daniel Weiskopf},
publisher = {Association for Computing Machinery, New York, NY, USA},
doi = {https://doi.org/10.1145/3379155.3391314},
author = {Michael Barz and Sven Stauden and Daniel Sonntag},
keywords = {Machine Learning, Search Target Inference, Mobile Eyetracking, Visual Attention},
url = {https://dl.acm.org/doi/10.1145/3379155.3391314}
}
M. Barz, K. Altmeyer, S. Malone, L. Lauer, und D. Sonntag, "Digital Pen Features Predict Task Difficulty and User Performance of Cognitive Tests" in Proc. Proceedings of the 28th ACM Conference on User Modeling, Adaptation and Personalization. International Conference on User Modeling, Adaptation, and Personalization (UMAP-2020), July 12-18, Genoa, Italy, 2020.
@inproceedings{pub10894, abstract = {Digital pen signals were shown to be predictive for cognitive states, cognitive load and emotion in educational settings. We investigate whether low-level pen-based features can predict the difficulty of tasks in a cognitive test and the learner's performance in these tasks, which is inherently related to cognitive load, without a semantic content analysis. We record data for tasks of varying difficulty in a controlled study with children from elementary school. We include two versions of the Trail Making Test (TMT) and six drawing patterns from the Snijders-Oomen Non-verbal intelligence test (SON) as tasks that feature increasing levels of difficulty. We examine how accurately we can predict the task difficulty and the user performance as a measure for cognitive load using support vector machines and gradient boosted decision trees with different feature selection strategies. The results show that our correlation-based feature selection is beneficial for model training, in particular when samples from TMT and SON are concatenated for joint modelling of difficulty and time. Our findings open up opportunities for technology-enhanced adaptive learning.},
month = {7},
year = {2020},
title = {Digital Pen Features Predict Task Difficulty and User Performance of Cognitive Tests},
booktitle = {Proceedings of the 28th ACM Conference on User Modeling, Adaptation and Personalization. International Conference on User Modeling, Adaptation, and Personalization (UMAP-2020), July 12-18, Genoa, Italy},
publisher = {ACM},
author = {Michael Barz and Kristin Altmeyer and Sarah Malone and Luisa Lauer and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/10894_digital_pen_predicts_task_performance.pdf https://dl.acm.org/doi/abs/10.1145/3340631.3394839}
}
D. Sonntag, F. Nunnari, und H. Profitlich, "The Skincare project, an interactive deep learning system for differential diagnosis of malignant skin lesions." BMBF, H2020, Bundesministerium für Bildung und Forschung Kapelle-Ufer 1 D-10117 Berlin, Technical Report , 2020.
@techreport{pub10912, series = {DFKI Research Reports, RR},
abstract = {A shortage of dermatologists causes long wait times for patients who seek dermatologic care. In addition, the diagnostic accuracy of general practitioners has been reported to be lower than the accuracy of artificial intelligence software. This article describes the Skincare project (H2020, EIT Digital). Contributions include enabling technology for clinical decision support based on interactive machine learning (IML), a reference architecture towards a Digital European Healthcare Infrastructure (also cf. EIT MCPS), technical components for aggregating digitised patient information, and the integration of decision support technology into clinical test-bed environments. However, the main contribution is a diagnostic and decision support system in dermatology for patients and doctors, an interactive deep learning system for differential diagnosis of malignant skin lesions. In this article, we describe its functionalities and the user interfaces to facilitate machine learning from human input. The baseline deep learning system, which delivers state-of-the-art results and the potential to augment general practitioners and even dermatologists, was developed and validated using de-identified cases from a dermatology image data base (ISIC), which has about 20000 cases for development and validation, provided by board-certified dermatologists defining the reference standard for every case. ISIC allows for differential diagnosis, a ranked list of eight diagnoses, that is used to plan treatments in the common setting of diagnostic ambiguity. We give an overall description of the outcome of the Skincare project, and we focus on the steps to support communication and coordination between humans and machine in IML. This is an integral part of the development of future cognitive assistants in the medical domain, and we describe the necessary intelligent user interfaces.},
month = {5},
year = {2020},
title = {The Skincare project, an interactive deep learning system for differential diagnosis of malignant skin lesions.},
type = {Technical Report},
volume = {1},
address = {Bundesministerium für Bildung und Forschung Kapelle-Ufer 1 D-10117 Berlin},
institution = {BMBF, H2020},
author = {Daniel Sonntag and Fabrizio Nunnari and Hans-Jürgen Profitlich},
keywords = {Machine learning, decision support, dermatology, skin cancer},
url = {https://arxiv.org/abs/2005.09448 https://www.dfki.de/fileadmin/user_upload/import/10912_main2.pdf}
}
F. Nunnari, C. Bhuvaneshwara, A. O. Ezema, und D. Sonntag, "A Study on the Fusion of Pixels and Patient Metadata in CNN-Based Classification of Skin Lesion Images" in Proc. Machine Learning and Knowledge Extraction. International IFIP Cross Domain (CD) Conference for Machine Learning & Knowledge Extraction (MAKE) (CD-MAKE-2020), August 25-28, Dublin, Ireland, 2020.
doi: 10.1007/978-3-030-57321-8_11
@inproceedings{pub11113, abstract = {We present a study on the fusion of pixel data and patient metadata (age, gender, and body location) for improving the classification of skin lesion images. The experiments have been conducted with the ISIC 2019 skin lesion classification challenge data set. Taking two plain convolutional neural networks (CNNs) as a baseline, metadata are merged using either non-neural machine learning methods (tree-based and support vector machines) or shallow neural networks. Results show that shallow neural networks outperform other approaches in all overall evaluation measures. However, despite the increase in the classification accuracy (up to +19.1%), interestingly, the average per-class sensitivity decreases in three out of four cases for CNNs, thus suggesting that using metadata penalizes the prediction accuracy for lower represented classes. A study on the patient metadata shows that age is the most useful metadatum as a decision criterion, followed by body location and gender.},
year = {2020},
title = {A Study on the Fusion of Pixels and Patient Metadata in CNN-Based Classification of Skin Lesion Images},
booktitle = {Machine Learning and Knowledge Extraction. International IFIP Cross Domain (CD) Conference for Machine Learning & Knowledge Extraction (MAKE) (CD-MAKE-2020), August 25-28, Dublin, Ireland},
editor = {Andreas Holzinger and Peter Kieseberg and A Min Tjoa and Edgar Weippl},
pages = {191-208},
isbn = {978-3-030-57321-8},
publisher = {Springer International Publishing},
doi = {10.1007/978-3-030-57321-8_11},
author = {Fabrizio Nunnari and Chirag Bhuvaneshwara and Abraham Obinwanne Ezema and Daniel Sonntag},
url = {https://link.springer.com/chapter/10.1007/978-3-030-57321-8_11 https://www.dfki.de/fileadmin/user_upload/import/11113_Nunnari20CD-MAKE.pdf}
}
H. M. D. Nguyen, A. Ezema, F. Nunnari, und D. Sonntag, "A Visually Explainable Learning System for Skin Lesion Detection Using Multiscale Input with Attention U-Net" in Proc. KI 2020: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2020), 43rd, September 21-25, Bamberg, Germany, 2020.
@inproceedings{pub11178, series = {Lecture Notes in Computer Science, LNCS},
abstract = {In this work, we propose a new approach to automatically predict the locations of visual dermoscopic attributes for Task 2 of the ISIC 2018 Challenge. Our method is based on the Attention U-Net with multi-scale images as input. We apply a new strategy based on transfer learning, i.e., training the deep network for feature extraction by adapting the weights of the network trained for segmentation. Our tests show that, first, the proposed algorithm is on par or outperforms the best ISIC 2018 architectures (LeHealth and NMN) in the extraction of two visual features. Secondly, it uses only 1/30 of the training parameters; we observed less computation and memory requirements, which are particularly useful for future implementations on mobile devices. Finally, our approach generates visually explainable behaviour with uncertainty estimations to help doctors in diagnosis and treatment decisions.},
month = {9},
year = {2020},
title = {A Visually Explainable Learning System for Skin Lesion Detection Using Multiscale Input with Attention U-Net},
booktitle = {KI 2020: Advances in Artificial Intelligence. German Conference on Artificial Intelligence (KI-2020), 43rd, September 21-25, Bamberg, Germany},
volume = {12325},
pages = {313-319},
publisher = {Springer},
author = {Ho Minh Duy Nguyen and Abraham Ezema and Fabrizio Nunnari and Daniel Sonntag},
keywords = {Skin lesion Diagnose features Attention U-Net},
url = {https://link.springer.com/chapter/10.1007/978-3-030-58285-2_28 https://www.dfki.de/fileadmin/user_upload/import/11178_KI_2020.pdf}
}
M. Kalimuthu, F. Nunnari, und D. Sonntag, "A Competitive Deep Neural Network Approach for the ImageCLEFmed Caption 2020 Task" German Research Center for Artificial Intelligence2020.
@techreport{pub11188, series = {DFKI Documents, D},
abstract = {The aim of ImageCLEFmed Caption task is to develop a system that automatically labels radiology images with relevant medical concepts. We describe our Deep Neural Network (DNN) based approach for tackling this problem. On the challenge test set of 3,534 radiology images, our system achieves an F1 score of 0.375 and ranks high, 12th among all systems that were successfully submitted to the challenge, whereby we only rely on the provided data sources and do not use any external medical knowledge or ontologies, or pretrained models from other medical image repositories or application domains.},
year = {2020},
title = {A Competitive Deep Neural Network Approach for the ImageCLEFmed Caption 2020 Task},
volume = {o.A.},
institution = {German Research Center for Artificial Intelligence},
author = {Marimuthu Kalimuthu and Fabrizio Nunnari and Daniel Sonntag},
url = {https://arxiv.org/pdf/2007.14226v1 https://www.dfki.de/fileadmin/user_upload/import/11188_A_Competitive_Deep_Neural_Network_Approach.pdf}
}
R. Biswas, M. Barz, und D. Sonntag, "Towards Explanatory Interactive Image Captioning Using Top-Down and Bottom-Up Features, Beam Search and Re-ranking" KI - Künstliche Intelligenz, German Journal on Artificial Intelligence - Organ des Fachbereiches "Künstliche Intelligenz" der Gesellschaft für Informatik e.V. (KI), vol. 36.
2020.
doi: 10.1007/s13218-020-00679-2
@article{pub11236, abstract = {Image captioning is a challenging multimodal task. Significant improvements could be obtained by deep learning. Yet, captions generated by humans are still considered better, which makes it an interesting application for interactive machine learning and explainable artificial intelligence methods. In this work, we aim at improving the performance and explainability of the state-of-the-art method Show, Attend and Tell by augmenting their attention mechanism using additional bottom-up features. We compute visual attention on the joint embedding space formed by the union of high-level features and the low-level features obtained from the object specific salient regions of the input image. We embed the content of bounding boxes from a pre-trained Mask R-CNN model. This delivers state-of-the-art performance, while it provides explanatory features. Further, we discuss how interactive model improvement can be realized through re-ranking caption candidates using beam search decoders and explanatory features. We show that interactive re-ranking of beam search candidates has the potential to outperform the state-of-the-art in image captioning.},
month = {7},
year = {2020},
title = {Towards Explanatory Interactive Image Captioning Using Top-Down and Bottom-Up Features, Beam Search and Re-ranking},
journal = {KI - Künstliche Intelligenz, German Journal on Artificial Intelligence - Organ des Fachbereiches "Künstliche Intelligenz" der Gesellschaft für Informatik e.V. (KI)},
volume = {36},
pages = {1-14},
publisher = {Springer},
doi = {10.1007/s13218-020-00679-2},
author = {Rajarshi Biswas and Michael Barz and Daniel Sonntag},
keywords = {Image Captioning, Deep Learning, Explainable AI, Visual Explanations, Interactive Machine Learning, Beam Search, Re-ranking},
url = {https://doi.org/10.1007/s13218-020-00679-2 https://www.dfki.de/fileadmin/user_upload/import/11236_2021_TOWARDS_EXPLANATORY_INTERACTIVE_IMAGE_CAPTIONING_USING_TOP-DOWN_AND_BOTTOM-UP_FEATURES,_BEAM_SEARCH_AND_RE-RANKING.pdf}
}
F. Nunnari, A. Ezema, und D. Sonntag, "The effects of masking in melanoma image classification with CNNs towards international standards for image preprocessing" in Proc. 2020 EAI International Symposium on Medical Artificial Intelligence. EAI International Symposium on Medical Artificial Intelligence (MedAI-2020), December 18, Online-Conference, 2020.
@inproceedings{pub11368, abstract = {The classification of skin lesion images is known to be biased by artifacts of the surrounding skin, but it is still not clear to what extent masking out healthy skin pixels influences classification performances, and why. To better understand this phenomenon, we apply different strategies of image masking (rectangular masks, circular masks, full masking, and image cropping) to three datasets of skin lesion images (ISIC2016, ISIC2018, and MedNode). We train CNN-based classifiers, provide performance metrics through a 10-fold cross-validation, and analyse the behaviour of Grad-CAM saliency maps through an automated visual inspection. Our experiments show that cropping is the best strategy to maintain classification performance and to significantly re- duce training times as well. Our analysis through visual inspection shows that CNNs have the tendency to focus on pixels of healthy skin when no malignant features can be identified. This suggests that CNNs have the tendency of "eagerly" looking for pixel areas to justify a classification choice, potentially leading to biased discriminators. To mitigate this effect, and to standardize image preprocessing, we suggest to crop images during dataset construction or before the learning step.},
month = {12},
year = {2020},
title = {The effects of masking in melanoma image classification with CNNs towards international standards for image preprocessing},
booktitle = {2020 EAI International Symposium on Medical Artificial Intelligence. EAI International Symposium on Medical Artificial Intelligence (MedAI-2020), December 18, Online-Conference},
note = {Virtual Conference},
publisher = {EAI},
author = {Fabrizio Nunnari and Abraham Ezema and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/11368_2020_EAI_MedAI_StudyOnDatasetBias.pdf https://link.springer.com/chapter/10.1007/978-3-030-70569-5_16}
}
A. Heimann-Steinert, A. Latendorf, A. Prange, D. Sonntag, und U. Müller-Werdan, "Digital pen technology for conducting cognitive assessments: a cross-over study with older adults" Psychological Research, vol. 85.
2020.
@article{pub11374, abstract = {Many digitalized cognitive assessments exist to increase reliability, standardization, and objectivity. Particularly in older adults, the performance of digitized cognitive assessments can lead to poorer test results if they are unfamiliar with the computer, mouse, keyboard, or touch screen. In a cross-over design study, 40 older adults (age M = 74.4 ± 4.1 years) conducted the Trail Making Test A and B with a digital pen (digital pen tests, DPT) and a regular pencil (pencil tests, PT) to identify differences in performance. Furthermore, the tests conducted with a digital pen were analyzed manually (manual results, MR) and electronically (electronic results, ER) by an automized system algorithm to determine the possibilities of digital pen evaluation. ICC(2,k) showed a good level of agreement for TMT A (ICC(2,k) = 0.668) and TMT B (ICC(2,k) = 0.734) between PT and DPT. When comparing MR and ER, ICC(2,k) showed an excellent level of agreement in TMT A (ICC(2,k) = 0.999) and TMT B (ICC(2,k) = 0.994). The frequency of pen lifting correlates significantly with the execution time in TMT A (r = 0.372, p = 0.030) and TMT B (r = 0.567, p < 0.001). A digital pen can be used to perform the Trail Making Test, as it has been shown that there is no difference in the results due to the type of pen used. With a digital pen, the advantages of digitized testing can be used without having to accept the disadvantages.},
year = {2020},
title = {Digital pen technology for conducting cognitive assessments: a cross-over study with older adults},
journal = {Psychological Research},
volume = {85},
pages = {1-9},
publisher = {Springer},
author = {A. Heimann-Steinert and A. Latendorf and Alexander Prange and Daniel Sonntag and U. Müller-Werdan},
keywords = {digital pen, machine learning, medicine, dementia, cognitive assessments},
url = {https://link.springer.com/article/10.1007/s00426-020-01452-8#citeas https://www.dfki.de/fileadmin/user_upload/import/11374_Heimann-Steinert-2020-DigitalPenTechnologyForConduct.pdf}
}
J. Müller, M. Sprenger, T. Franke, P. Lukowicz, C. Reidick, und M. Herrlich, "Game of TUK: deploying a large-scale activity-boosting gamification project in a university context" in Proc. Mensch und Computer. Mensch und Computer (MuC-2020), 2020.
@inproceedings{pub12112, abstract = {We present Game of TUK, a gamified mobile app to increase physical activity among students at TU Kaiserslautern. The scale of our project with almost 2,000 players over the course of four weeks is unique for a project in a university context. We present feedback we received and share our insights. Our results show that location-based activities in particular were very popular. In contrast, mini-games included in the app did not contribute as much to user activity as expected.},
year = {2020},
title = {Game of TUK: deploying a large-scale activity-boosting gamification project in a university context},
booktitle = {Mensch und Computer. Mensch und Computer (MuC-2020)},
publisher = {ACM},
author = {Julia Müller and Max Sprenger and Tobias Franke and Paul Lukowicz and Claudia Reidick and Marc Herrlich},
url = {https://dl.acm.org/doi/abs/10.1145/3404983.3410008 https://www.dfki.de/fileadmin/user_upload/import/12112_2020_GAME_OF_TUK-_DEPLOYING_A_LARGE-SCALE_ACTIVITY-BOOSTING_GAMIFICATION_PROJECT_IN_A_UNIVERSITY_CONTEXT.pdf}
}
D. Sonntag, "Medical and Health Systems." Association for Computing Machinery and Morgan & Claypool.
@inbook{pub10812, abstract = {In this chapter, we discuss the trends of mutlimodal-multisensor interfaces for medical and health systems. We emphasize the theoretical foundations of multimodal interfaces and systems in the healthcare domain. We aim to provide a basis for motivating and accelerating future interfaces for medical and health systems. Therefore, we provide many examples of existing and futuristic systems. For each of these systems, we define a classification into clinical systems and non-clinical systems, as well as sub-classes of multimodal and multisensor interfaces, to help structure the recent work in this emerging research field of medical and health systems.},
year = {2019},
title = {Medical and Health Systems},
booktitle = {Sharon Oviatt; Björn W Schuller; Philip R Cohen; Daniel Sonntag; Gerasimos Potamianos; Antonio Krüger;: The Handbook of Multimodal-Multisensor Interfaces: Language Processing, Software, Commercialization, and Emerging Directions - Volume 3},
pages = {423-476},
isbn = {9781970001754},
publisher = {Association for Computing Machinery and Morgan & Claypool},
author = {Daniel Sonntag},
url = {https://doi.org/10.1145/3233795.3233808 https://www.dfki.de/fileadmin/user_upload/import/10812_Medical-and-Health-Systems.pdf}
}
D. Sonntag, "Künstliche Intelligenz in der Medizin -- Holzweg oder Heilversprechen?" HNO, vol. 67, iss. 5.
2019.
doi: 10.1007/s00106-019-0665-z
@article{pub10833, abstract = {Künstliche Intelligenz (KI) hat in den letzten Jahren eine neue Reifephase erreicht und entwickelt sich zum Treiber der Digitalisierung in allen Lebensbereichen. Die KI ist eine Querschnittstechnologie, die für alle Bereiche der Medizin mit Bilddaten, Textdaten und Biodaten von großer Bedeutung ist. Es gibt keinen medizinischen Bereich, der nicht von KI beeinflusst werden wird. Dabei spielt die klinische Entscheidungsunterstützung eine wichtige Rolle. Gerade beim medizinischen Workflow-Management und bei der Vorhersage des Behandlungserfolgs bzw. Behandlungsergebnisses etablieren sich KI-Methoden. In der Bilddiagnose und im Patientenmanagement können KI-Systeme bereits unterstützen, aber sie können keine kritischen Entscheidungen vorschlagen. Die jeweiligen Präventions- oder Therapiemaßnahmen können mit KI-Unterstützung sinnvoller bewertet werden, allerdings ist die Abdeckung der Krankheiten noch viel zu gering, um robuste Systeme für den klinischen Alltag zu erstellen. Der flächendeckende Einsatz setzt Fortbildungsmaßnahmen für Ärzte voraus, um die Entscheidung treffen zu können, wann auf automatische Entscheidungsunterstützung vertraut werden kann.},
number = {5},
year = {2019},
title = {Künstliche Intelligenz in der Medizin -- Holzweg oder Heilversprechen?},
journal = {HNO},
volume = {67},
pages = {343-349},
publisher = {Springer},
doi = {10.1007/s00106-019-0665-z},
author = {Daniel Sonntag},
url = {https://doi.org/10.1007/s00106-019-0665-z https://www.dfki.de/fileadmin/user_upload/import/10833_sonntag-hno-ki-DFKI-repository.pdf}
}
M. Barz und D. Sonntag, "Incremental Improvement of a Question Answering System by Re-ranking Answer Candidates using Machine Learning" Computing Research Repository eprint Journal (CoRR), vol. abs/1908.10149.
2019.
@article{pub10895, abstract = {We implement a method for re-ranking top-10 results of a state-of-the-art question answering (QA) system. The goal of our re-ranking approach is to improve the answer selection given the user question and the top-10 candidates. We focus on improving deployed QA systems that do not allow re-training or re-training comes at a high cost. Our re-ranking approach learns a similarity function using n-gram based features using the query, the answer and the initial system confidence as input. Our contributions are: (1) we generate a QA training corpus starting from 877 answers from the customer care domain of T-Mobile Austria, (2) we implement a state-of-the-art QA pipeline using neural sentence embeddings that encode queries in the same space than the answer index, and (3) we evaluate the QA pipeline and our re-ranking approach using a separately provided test set. The test set can be considered to be available after deployment of the system, e.g., based on feedback of users. Our results show that the system performance, in terms of top-n accuracy and the mean reciprocal rank, benefits from re-ranking using gradient boosted regression trees. On average, the mean reciprocal rank improves by 9.15%.},
month = {8},
year = {2019},
title = {Incremental Improvement of a Question Answering System by Re-ranking Answer Candidates using Machine Learning},
journal = {Computing Research Repository eprint Journal (CoRR)},
volume = {abs/1908.10149},
pages = {1-13},
publisher = {arXiv},
author = {Michael Barz and Daniel Sonntag},
url = {https://arxiv.org/abs/1908.10149 https://www.dfki.de/fileadmin/user_upload/import/10895_1908.10149.pdf}
}
H. Profitlich und D. Sonntag, "Interactivity and Transparency in Medical Risk Assessment with Supersparse Linear Integer Models" BMBF2019.
@techreport{pub11177, series = {DFKI Research Reports, RR},
abstract = {Scoring systems are linear classification models that only require users to add or subtract a few small numbers in order to make a prediction. They are used for example by clinicians to assess the risk of medical conditions. This work focuses on our approach to implement an intuitive user interface to allow a clinician to generate such scoring systems interactively, based on the RiskSLIM machine learning library. We describe the technical architecture which allows a medical professional who is not specialised in developing and applying machine learning algorithms to create competitive transparent supersparse linear integer models in an interactive way. We demonstrate our prototype machine learning system in the nephrology domain, where doctors can interactively sub-select datasets to compute models, explore scoring tables that correspond to the learned models, and check the quality of the transparent solutions from a medical perspective.},
year = {2019},
title = {Interactivity and Transparency in Medical Risk Assessment with Supersparse Linear Integer Models},
journal = {CoRR},
volume = {abs/1911.12119},
institution = {BMBF},
publisher = {ArXiv},
author = {Hans-Jürgen Profitlich and Daniel Sonntag},
url = {http://arxiv.org/abs/1911.12119 https://www.dfki.de/fileadmin/user_upload/import/11177_integer.pdf}
}
S. Stauden, M. Barz, und D. Sonntag, "Visual Search Target Inference Using Bag of Deep Visual Words" in Proc. KI 2018: Advances in Artificial Intelligence - 41st German Conference on AI. German Conference on Artificial Intelligence (KI-2018), September 24-28, Berlin, Germany, 2018.
doi: 10.1007/978-3-030-00111-7_25
@inproceedings{pub10896, abstract = {Visual Search target inference subsumes methods for predicting the target object through eye tracking. A person intents to find an object in a visual scene which we predict based on the fixation behavior. Knowing about the search target can improve intelligent user interaction. In this work, we implement a new feature encoding, the Bag of Deep Visual Words, for search target inference using a pre-trained convolutional neural network (CNN). Our work is based on a recent approach from the literature that uses Bag of Visual Words, common in computer vision applications. We evaluate our method using a gold standard dataset. The results show that our new feature encoding outperforms the baseline from the literature, in particular, when excluding fixations on the target.},
month = {8},
year = {2018},
title = {Visual Search Target Inference Using Bag of Deep Visual Words},
booktitle = {KI 2018: Advances in Artificial Intelligence - 41st German Conference on AI. German Conference on Artificial Intelligence (KI-2018), September 24-28, Berlin, Germany},
editor = {Frank Trollmann and Anni-Yasmin Turhan},
publisher = {Springer},
doi = {https://doi.org/10.1007/978-3-030-00111-7_25},
author = {Sven Stauden and Michael Barz and Daniel Sonntag},
url = {https://www.dfki.de/fileadmin/user_upload/import/10896_2018_Visual_Search_Target_Inference_Using_Bag_of_Deep_Visual_Words.pdf https://link.springer.com/content/pdf/10.1007/978-3-030-00111-7_25.pdf}
}
D. Sonntag und H. Profitlich, "An architecture of open-source tools to combine textual information extraction, faceted search and information visualisation" Computing Research Repository eprint Journal (CoRR), vol. abs/1810.12627.
2018.
@article{pub11491, abstract = {This article presents our steps to integrate complex and partly unstructured medical data into a clinical research database with subsequent decision support. Our main application is an integrated faceted search tool, accompanied by the visualisation of results of automatic information extraction from textual documents. We describe the details of our technical architecture (open-source tools), to be replicated at other universities, research institutes, or hospitals. Our exemplary use cases are nephrology and mammography. The software was first developed in the nephrology domain and then adapted to the mammography use case. We report on these case studies, illustrating how the application can be used by a clinician and which questions can be answered. We show that our architecture and the employed software modules are suitable for both areas of application with a limited amount of adaptations. For example, in nephrology we try to answer questions about the temporal characteristics of event sequences to gain significant insight from the data for cohort selection. We present a versatile time-line tool that enables the user to explore relations between a multitude of diagnosis and laboratory values.},
year = {2018},
title = {An architecture of open-source tools to combine textual information extraction, faceted search and information visualisation},
journal = {Computing Research Repository eprint Journal (CoRR)},
volume = {abs/1810.12627},
pages = {13-28},
publisher = {Elsevier},
author = {Daniel Sonntag and Hans-Jürgen Profitlich},
keywords = {clinical decision support; information extraction; natural language processing; medical data analysis; data management; faceted search; human-computer interaction; visualisation; electronic health record;},
url = {http://arxiv.org/abs/1810.12627 https://www.dfki.de/fileadmin/user_upload/import/11491_1810.12627.pdf}
}
A. Prange, D. Schmidt, und D. Sonntag, "A Digital Pen Based Tool for Instant Digitisation and Digitalisation of Biopsy Protocols" in Proc. 2017 IEEE 30th International Symposium on Computer-Based Medical Systems (CBMS). IEEE International Symposium on Computer-Based Medical Systems (CBMS-2017), 2017.
doi: 10.1109/CBMS.2017.132
@inproceedings{pub11235, abstract = {In order to improve medical processes in nephrology, we present an application that allows doctors to create biopsy protocols by using a digital pen on a tablet. The biopsy protocol app is seamlessly integrated into the existing infrastructure at the hospital (see figure 1). Compared to other reporting tools, we provide (1) real-time hand-writing/gesture recognition and real-time feedback on the recognition results on the screen; (2) a real-time digitisation into structured data and PDF documents; and (3) the mapping of the transcribed contents into concepts of the Banff classification. Our approach combines the benefits of paper with the automatic digitisation and digitalisation of hand-written user input. A fully digital and mobile approach should empower nephrologists to produce high quality data more effectively and in real-time so that it can be directly used in hospital processes.},
month = {6},
year = {2017},
title = {A Digital Pen Based Tool for Instant Digitisation and Digitalisation of Biopsy Protocols},
booktitle = {2017 IEEE 30th International Symposium on Computer-Based Medical Systems (CBMS). IEEE International Symposium on Computer-Based Medical Systems (CBMS-2017)},
pages = {773-774},
publisher = {IEEE Xplore},
doi = {10.1109/CBMS.2017.132},
author = {Alexander Prange and Danilo Schmidt and Daniel Sonntag},
keywords = {gesture recognition, health care, interactive devices, medical computing, medical information systems, mobile computing, mobile approach, hand-written user input, automatic digitisation, real-time digitisation, real-time feedback, real-time hand-writing/g}
}
D. Sonntag, Interakt - A Multimodal Multisensory Interactive Cognitive Assessment Tool.
@misc{pub11489, abstract = {Cognitive assistance may be valuable in applications for doctors and therapists that reduce costs and improve quality in healthcare systems. Use cases and scenarios include the assessment of dementia. In this paper, we present our approach to the (semi-)automatic assessment of dementia.},
year = {2017},
title = {Interakt - A Multimodal Multisensory Interactive Cognitive Assessment Tool},
volume = {abs/1709.01796},
pages = {4},
author = {Daniel Sonntag},
url = {http://arxiv.org/abs/1709.01796 https://www.dfki.de/fileadmin/user_upload/import/11489_1709.01796.pdf}
}
D. Sonntag und H. Profitlich, "Integrated Decision Support by Combining Textual Information Extraction, Facetted Search and Information Visualisation" in Proc. 2017 IEEE 30th International Symposium on Computer-Based Medical Systems (CBMS). IEEE International Symposium on Computer-Based Medical Systems (CBMS-2017), June 22-24, Thessaloniki, Greece, 2017.
doi: 10.1109/CBMS.2017.119
@inproceedings{pub11492, abstract = {This work focusses on our integration steps of complex and partly unstructured medical data into a clinical research database with subsequent decision support. Our main application is an integrated facetted search tool, followed by information visualisation based on automatic information extraction results from textual documents. We describe the details of our technical architecture (open-source tools), to be replicated at other universities, research institutes, or hospitals. Our exemplary use case is nephrology, where we try to answer questions about the temporal characteristics of sequences and gain significant insight from the data for cohort selection. We report on this case study, illustrating how the application can be used by a clinician and which questions can be answered.},
year = {2017},
title = {Integrated Decision Support by Combining Textual Information Extraction, Facetted Search and Information Visualisation},
booktitle = {2017 IEEE 30th International Symposium on Computer-Based Medical Systems (CBMS). IEEE International Symposium on Computer-Based Medical Systems (CBMS-2017), June 22-24, Thessaloniki, Greece},
pages = {95-100},
publisher = {IEEE},
doi = {10.1109/CBMS.2017.119},
author = {Daniel Sonntag and Hans-Jürgen Profitlich},
url = {https://ieeexplore.ieee.org/document/8104164}
}
D. Schmidt, K. Budde, D. Sonntag, H. Profitlich, M. Ihle, und O. Staeck, "A novel tool for the identification of correlations in medical data by faceted search" Computers in Biology and Medicine - An International Journal, vol. 85.
2017.
doi: 10.1016/j.compbiomed.2017.04.011
@article{pub11493, abstract = {This work focuses on the integration of multifaceted extensive data sets (e.g. laboratory values, vital data, medications) and partly unstructured medical data such as discharge letters, diagnostic reports, clinical notes etc. in a research database. Our main application is an integrated faceted search in nephrology based on information extraction results. We describe the details of the application of transplant medicine and the resulting technical architecture of the faceted search application.},
year = {2017},
title = {A novel tool for the identification of correlations in medical data by faceted search},
journal = {Computers in Biology and Medicine - An International Journal},
volume = {85},
pages = {98-105},
publisher = {Elsevier},
doi = {https://doi.org/10.1016/j.compbiomed.2017.04.011},
author = {Danilo Schmidt and Klemens Budde and Daniel Sonntag and Hans-Jürgen Profitlich and Matthias Ihle and Oliver Staeck},
keywords = {Faceted search, Knowledge based systems, Medical domain, Decision support},
url = {https://www.sciencedirect.com/science/article/pii/S0010482517300975}
}
A. Prange und D. Sonntag, "Digital PI-RADS: Smartphone Sketches for Instant Knowledge Acquisition in Prostate Cancer Detection" in Proc. 2016 IEEE 29th International Symposium on Computer-Based Medical Systems (CBMS). IEEE International Symposium on Computer-Based Medical Systems (CBMS-2016), 2016.
doi: 10.1109/CBMS.2016.23
@inproceedings{pub11234, abstract = {In order to improve reporting practices for the detection of prostate cancer, we present an application that allows urologists to create structured reports by using a digital pen on a smartphone. In this domain, printed documents cannot be easily replaced by computer systems because they contain free-form sketches and textual annotations, and the acceptance of traditional PC reporting tools is rather low among the doctors. Our approach provides an instant knowledge acquisition system by automatically interpreting the written strokes, texts, and sketches. We have incorporated this structured reporting system for MRI of the prostate (PI-RADS). Our system imposes only minimal overhead on traditional form-filling processes and provides for a direct, ontology-based structuring of the user input for semantic search and retrieval applications.},
month = {6},
year = {2016},
title = {Digital PI-RADS: Smartphone Sketches for Instant Knowledge Acquisition in Prostate Cancer Detection},
booktitle = {2016 IEEE 29th International Symposium on Computer-Based Medical Systems (CBMS). IEEE International Symposium on Computer-Based Medical Systems (CBMS-2016)},
pages = {13-18},
publisher = {IEEE Xplore},
doi = {10.1109/CBMS.2016.23},
author = {Alexander Prange and Daniel Sonntag},
keywords = {biomedical MRI, cancer, information retrieval, knowledge acquisition, medical image processing, ontologies (artificial intelligence), smart phones, digital PI-RADS, smartphone, prostate cancer detection, structured reports, digital pen, printed documents,}
}
D. Sonntag, V. Tresp, S. Zillner, A. Cavallaro, M. Hammon, A. Reis, P. A. Fasching, M. Sedlmayr, T. Ganslandt, H. Prokosch, K. Budde, D. Schmidt, C. Hinrichs, T. Wittenberg, P. Daumke, und P. G. Oppelt, "The Clinical Data Intelligence Project - A smart data initiative" Informatik Spektrum, vol. 39, iss. 4.
2016.
doi: 10.1007/s00287-015-0913-x
@article{pub11490, abstract = {This article is about a new project that combines clinical data intelligence and smart data. It provides an introduction to the “Klinische Datenintelligenz” (KDI) project which is founded by the Federal Ministry for Economic Affairs and Energy (BMWi); we transfer research and development results (R&D) of the analysis of data which are generated in the clinical routine in specific medical domain. We present the project structure and goals, how patient care should be improved, and the joint efforts of data and knowledge engineering, information extraction (from textual and other unstructured data), statistical machine learning, decision support, and their integration into special use cases moving towards individualised medicine. In particular, we describe some details of our medical use cases and cooperation with two major German university hospitals.},
number = {4},
year = {2016},
title = {The Clinical Data Intelligence Project - A smart data initiative},
journal = {Informatik Spektrum},
volume = {39},
pages = {290-300},
publisher = {Springer},
doi = {https://doi.org/10.1007/s00287-015-0913-x},
author = {Daniel Sonntag and Volker Tresp and Sonja Zillner and Alexander Cavallaro and Matthias Hammon and André Reis and Peter A. Fasching and Martin Sedlmayr and Thomas Ganslandt and Hans-Ulrich Prokosch and Klemens Budde and Danilo Schmidt and Carl Hinrichs and Thomas Wittenberg and Philipp Daumke and Patricia G. Oppelt},
url = {https://doi.org/10.1007/s00287-015-0913-x https://www.dfki.de/fileadmin/user_upload/import/11490_The_Clinical_Data_Intelligence_Project_-_A_smart_data_initiative.pdf}
}
A. Prange und D. Sonntag, "Easy Deployment of Spoken Dialogue Technology on Smartwatches for Mental Healthcare" in Proc. Pervasive Computing Paradigms for Mental Health - 5th International Conference, MindCare 2015, Milan, Italy, September 24-25, 2015, Revised Selected Papers. International Symposium on Pervasive Computing Paradigms for Mental Health (MindCare-2015), 2015.
doi: 10.1007/978-3-319-32270-4_15
@inproceedings{pub11231, series = {Communications in Computer and Information Science},
abstract = {Smartwatches are becoming increasingly sophisticated and popular as several major smartphone manufacturers, including Apple, have released their new models recently. We believe that these devices can serve as smart objects for people suffering from mental disorders such as memory loss. In this paper, we describe how to utilise smartwatches to create intelligent user interfaces that can be used to provide cognitive assistance in daily life situations of dementia patients. By using automatic speech recognisers and text-to-speech synthesis, we create a dialogue application that allows patients to interact through natural language. We compare several available libraries for Android and show an example of integrating a smartwatch application into an existing healthcare infrastructure.},
year = {2015},
title = {Easy Deployment of Spoken Dialogue Technology on Smartwatches for Mental Healthcare},
booktitle = {Pervasive Computing Paradigms for Mental Health - 5th International Conference, MindCare 2015, Milan, Italy, September 24-25, 2015, Revised Selected Papers. International Symposium on Pervasive Computing Paradigms for Mental Health (MindCare-2015)},
volume = {604},
pages = {150-156},
publisher = {Springer},
doi = {https://doi.org/10.1007/978-3-319-32270-4_15},
author = {Alexander Prange and Daniel Sonntag},
keywords = {Smartwatch, Speech dialogue, Text-to-speech, Automatic speech recognition, Mental health},
url = {https://doi.org/10.1007/978-3-319-32270-4_15 https://www.dfki.de/fileadmin/user_upload/import/11231_Easy_Deployment_of_Spoken_Dialogue_Technology_on_Smartwatches_for_Mental_Healthcare.pdf}
}
A. Prange, I. P. Sandrala, M. Weber, und D. Sonntag, "Robot Companions and Smartpens for Improved Social Communication of Dementia Patients" in Proc. Proceedings of the 20th International Conference on Intelligent User Interfaces Companion. International Conference on Intelligent User Interfaces (IUI-2015), New York, NY, USA, 2015.
doi: 10.1145/2732158.2732174
@inproceedings{pub11232, series = {IUI Companion '15},
abstract = {In this demo paper we describe how a digital pen and a humanoid robot companion can improve the social communication of a dementia patient. We propose the use of NAO, a humanoid robot, as a companion to the dementia patient in order to continuously monitor his or her activities and provide cognitive assistance in daily life situations. For example, patients can communicate with NAO through natural language by the speech dialogue functionality we integrated. Most importantly, to improve communication, i.e., sending digital messages (texting, emails), we propose the usage of a smartpen, where the patients write messages on normal paper with an invisible dot pattern to initiate hand-writing and sketch recognition in real-time. The smartpen application is embedded into the human-robot speech dialogue.},
year = {2015},
title = {Robot Companions and Smartpens for Improved Social Communication of Dementia Patients},
booktitle = {Proceedings of the 20th International Conference on Intelligent User Interfaces Companion. International Conference on Intelligent User Interfaces (IUI-2015), New York, NY, USA},
isbn = {9781450333085},
publisher = {Association for Computing Machinery},
doi = {https://doi.org/10.1145/2732158.2732174},
author = {Alexander Prange and Indra Praveen Sandrala and Markus Weber and Daniel Sonntag},
keywords = {pen/ink interface, healthcare, reality orientation dialogue, speech dialogue, design, realtime interaction},
url = {https://doi.org/10.1145/2732158.2732174 https://www.dfki.de/fileadmin/user_upload/import/11232_Robot_Companions_and_Smartpens_for_Improved_Social_Communication_of_Dementia_Patients.pdf}
}
A. Prange, T. Toyama, und D. Sonntag, "Towards Gaze and Gesture Based Human-Robot Interaction for Dementia Patients" in Proc. 2015 AAAI Fall Symposia, Arlington, Virginia, USA, November 12-14, 2015. AAAI Fall Symposium (AAAI-2015), November 12-14, 2015.
@inproceedings{pub11233, abstract = {Gaze and gestures are important modalities in human-human interactions and hence important to human-robot interaction. We describe how to use human gaze and robot pointing gestures to disambiguate and extend a human-robot speech dialogue developed for aiding people suffering from dementia.},
year = {2015},
title = {Towards Gaze and Gesture Based Human-Robot Interaction for Dementia Patients},
booktitle = {2015 AAAI Fall Symposia, Arlington, Virginia, USA, November 12-14, 2015. AAAI Fall Symposium (AAAI-2015), November 12-14},
pages = {111-113},
publisher = {AAAI Press},
author = {Alexander Prange and Takumi Toyama and Daniel Sonntag},
keywords = {Human-Robot Interaction, Gaze-Based HRI, Gaze, Pointing Gesture, Dementia Patients},
url = {https://www.dfki.de/fileadmin/user_upload/import/11233_Towards_Gaze,_Gesture_and_Speech-Based.pdf https://cdn.aaai.org/ocs/11696/11696-51323-1-PB.pdf#:~:text=Gaze,%20gestures,%20and%20speech%20are%20important}