<?xml version="1.0" encoding="iso-8859-1" standalone="no"?>
<!DOCTYPE GmsArticle SYSTEM "http://www.egms.de/dtd/2.0.34/GmsArticle.dtd">
<GmsArticle xmlns:xlink="http://www.w3.org/1999/xlink">
  <MetaData>
    <Identifier>zma001344</Identifier>
    <IdentifierDoi>10.3205/zma001344</IdentifierDoi>
    <IdentifierUrn>urn:nbn:de:0183-zma0013448</IdentifierUrn>
    <ArticleType>article</ArticleType>
    <TitleGroup>
      <Title language="en">Learning to diagnose collaboratively: validating a simulation for medical students</Title>
    </TitleGroup>
    <CreatorList>
      <Creator>
        <PersonNames>
          <Lastname>Radkowitsch</Lastname>
          <LastnameHeading>Radkowitsch</LastnameHeading>
          <Firstname>Anika</Firstname>
          <Initials>A</Initials>
        </PersonNames>
        <Address>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, Lehrstuhl f&#252;r Empirische P&#228;dagogik, Department Psychologie, Department Psychologie und Munich Center of the Learning Sciences, Leopoldstr. 13, D-80802 M&#252;nchen, Germany<Affiliation>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, Munich Center of the Learning Sciences, M&#252;nchen, Germany</Affiliation><Affiliation>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, Lehrstuhl f&#252;r Empirische P&#228;dagogik, Department Psychologie, M&#252;nchen, Germany</Affiliation></Address>
        <Email>anika.radkowitsch&#64;psy.lmu.de</Email>
        <Creatorrole corresponding="yes" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Fischer</Lastname>
          <LastnameHeading>Fischer</LastnameHeading>
          <Firstname>Martin R.</Firstname>
          <Initials>MR</Initials>
          <AcademicTitle>Prof. Dr. med.</AcademicTitle>
        </PersonNames>
        <Address>
          <Affiliation>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, Munich Center of the Learning Sciences, M&#252;nchen, Germany</Affiliation>
          <Affiliation>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, LMU Klinikum, Institut f&#252;r Didaktik und Ausbildungsforschung in der Medizin, M&#252;nchen, Germany</Affiliation>
        </Address>
        <Email>martin.fischer&#64;med.uni-muenchen.de</Email>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Schmidmaier</Lastname>
          <LastnameHeading>Schmidmaier</LastnameHeading>
          <Firstname>Ralf</Firstname>
          <Initials>R</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, Munich Center of the Learning Sciences, M&#252;nchen, Germany</Affiliation>
          <Affiliation>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, LMU Klinikum, Medizinische Klinik und Poliklinik IV, M&#252;nchen, Germany</Affiliation>
        </Address>
        <Email>ralf.schmidmaier&#64;med.uni-muenchen.de</Email>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
      <Creator>
        <PersonNames>
          <Lastname>Fischer</Lastname>
          <LastnameHeading>Fischer</LastnameHeading>
          <Firstname>Frank</Firstname>
          <Initials>F</Initials>
        </PersonNames>
        <Address>
          <Affiliation>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, Munich Center of the Learning Sciences, M&#252;nchen, Germany</Affiliation>
          <Affiliation>Ludwig-Maximilians-Universit&#228;t M&#252;nchen, Lehrstuhl f&#252;r Empirische P&#228;dagogik, Department Psychologie, M&#252;nchen, Germany</Affiliation>
        </Address>
        <Email>frank.fischer&#64;psy.lmu.de</Email>
        <Creatorrole corresponding="no" presenting="no">author</Creatorrole>
      </Creator>
    </CreatorList>
    <PublisherList>
      <Publisher>
        <Corporation>
          <Corporatename>German Medical Science GMS Publishing House</Corporatename>
        </Corporation>
        <Address>D&#252;sseldorf</Address>
      </Publisher>
    </PublisherList>
    <SubjectGroup>
      <SubjectheadingDDB>610</SubjectheadingDDB>
      <Keyword language="en">collaboration</Keyword>
      <Keyword language="en">simulation</Keyword>
      <Keyword language="en">collaborative diagnostic reasoning</Keyword>
      <Keyword language="en">validation</Keyword>
      <SectionHeading language="en">Making Diagnoses</SectionHeading>
    </SubjectGroup>
    <DateReceived>20181111</DateReceived>
    <DateRevised>20200424</DateRevised>
    <DateAccepted>20200629</DateAccepted>
    <DatePublishedList>
      
    <DatePublished>20200915</DatePublished></DatePublishedList>
    <Language>engl</Language>
    <License license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
      <AltText language="en">This is an Open Access article distributed under the terms of the Creative Commons Attribution 4.0 License.</AltText>
      <AltText language="de">Dieser Artikel ist ein Open-Access-Artikel und steht unter den Lizenzbedingungen der Creative Commons Attribution 4.0 License (Namensnennung).</AltText>
    </License>
    <SourceGroup>
      <Journal>
        <ISSN>2366-5017</ISSN>
        <Volume>37</Volume>
        <Issue>5</Issue>
        <JournalTitle>GMS Journal for Medical Education</JournalTitle>
        <JournalTitleAbbr>GMS J Med Educ</JournalTitleAbbr>
      </Journal>
    </SourceGroup>
    <ArticleNo>51</ArticleNo>
    <Fundings>
      <Funding fundId="FI 792/11-1">Deutsche Forschungsgemeinschaft (DFG)</Funding>
    </Fundings>
  </MetaData>
  <OrigData>
    <Abstract language="en" linked="yes"><Pgraph><Mark1>Objectives: </Mark1>Physicians with different professional backgrounds often diagnose a patients&#8217; problem collaboratively. In this article, we first introduce a process model for collaborative diagnosing (CDR model), describe the development of a simulation used to empirically examine the facilitation of collaborative diagnostic reasoning. Based on a contemporary validity framework <TextLink reference="1"></TextLink>, we further suggest indicators for validity and collect initial evidence with respect to the scoring, generalization, extrapolation, and implication inferences to assess the validity of the simulation when used to assess effects of learning interventions.  </Pgraph><Pgraph><Mark1>Method:</Mark1> In a quasi-experimental study, we assessed objectivity and reliability of the simulation and compared medical students with low and advanced prior knowledge to practitioners with high prior knowledge with respect to their diagnostic accuracy, diagnostic efficiency, information sharing skills, and their intrinsic cognitive load. Additionally, we obtained authenticity ratings from practitioners with high prior knowledge. </Pgraph><Pgraph><Mark1>Results: </Mark1>The results yielded satisfying initial evidence for the validity of the scoring and the extrapolation inferences as ratings are objective, and the simulation and the collaborative process is perceived as rather authentic. Additionally, participants on different levels of prior knowledge differ with respect to their diagnostic accuracy, diagnostic efficiency, information sharing skills, and their reported intrinsic cognitive load. With one exception (information sharing skills), the generalization inference seems to be valid as well. </Pgraph><Pgraph><Mark1>Conclusions: </Mark1>We conclude that collecting validity evidence for the simulation was an important step towards a better interpretation of the simulation. We found that the simulation is an authentic and valid representation of the chosen collaborative situation and that the collected validity evidence offers sufficient evidence for an initial validation of the simulation. Nevertheless, the validation process highlighted some important gaps that need further consideration. We further conclude that applying a validation model to the context of empirical research is promising and encourage other researchers to follow the example. </Pgraph></Abstract>
    <TextBlock linked="yes" name="1. Introduction">
      <MainHeadline>1. Introduction</MainHeadline><Pgraph>In their daily practice, physicians with different professional backgrounds often diagnose patients&#8217; problems collaboratively. For example, an internist diagnosing a patient suffering from fever and shortness of breath might consult a radiologist to conduct a CT scan the results of which will be discussed afterwards. In those situations, physicians need to be able to diagnose individually, that means being able to gather and integrate case-specific information with the goal to reduce uncertainty to make a medical decision <TextLink reference="2"></TextLink>. But they also need collaborative competences such as sharing of relevant information, negotiation, and coordination skills <TextLink reference="3"></TextLink>. A recent review shows that collaborative diagnostic reasoning has been scarcely investigated empirically yet <TextLink reference="4"></TextLink>. The available empirical literature demonstrates that physicians often have difficulties to diagnose collaboratively. For example, the quality of the distribution and exchange of information among team members <TextLink reference="5"></TextLink> and the experience of team members <TextLink reference="6"></TextLink> seem to be key predictors for the quality of collaborative diagnostic reasoning. Such difficulties in information sharing also could affect the quality of subsequent negotiation processes. For instance, if an internist fails to share differential diagnoses and the respective symptoms, the radiologist will have a much harder time to interpret and to discuss the radiologic findings. Offering instructional support to foster collaborative diagnostic reasoning and in particular information sharing, therefore, seems vital. Simulation-based learning is an established method to foster complex competences and its effectiveness has been meta-analytically examined for health professions <TextLink reference="7"></TextLink> as well as across domains <TextLink reference="8"></TextLink> although it seems that scaffolding beyond mere problem-solving is beneficial for learning <TextLink reference="9"></TextLink>. We developed, therefore, a simulation with the goal to identify instructional conditions under which simulations effectively advance collaborative diagnostic reasoning. Importantly, training and assessment of competences presupposes evidence of its validity. We follow Kane&#8217;s <TextLink reference="1"></TextLink> validity framework for the validation of instruments as suggested by Cook and Hatala <TextLink reference="10"></TextLink>. </Pgraph><Pgraph>In this paper, we want to collect initial evidence for validity of the simulation by constructing a validity argument for a simulation used to conduct experiments on the facilitation of collaborative diagnostic reasoning. For that, we first elaborate on a model of collaborative diagnostic reasoning and describe how simulations can be used to assess and facilitate complex competences. We further explain our validation approach based on Kane&#8217;s <TextLink reference="1"></TextLink> framework as well as validity indicators that are based on theory. Afterwards, we shortly describe the development of our simulation which included several evaluation and revision cycles (cf. <TextLink reference="11"></TextLink>). Finally, we present a validation study that was conducted to analyze the validity indicators and discuss the extent to which the results add to our validity argument.  </Pgraph></TextBlock>
    <TextBlock linked="yes" name="2. Collaborative diagnostic reasoning">
      <MainHeadline>2. Collaborative diagnostic reasoning</MainHeadline><Pgraph>Collaborative diagnostic reasoning means to accurately and efficiently diagnose a patient&#8217;s problem by generating and evaluating evidences and hypotheses that can be shared with, elicited from, or negotiated among collaborators <TextLink reference="12"></TextLink>. In the medical and psychological literature, however, diagnosing has been largely conceptualized as individual competence and by using varying terms such as clinical or diagnostic reasoning, clinical decision-making, or clinical problem-solving (e.g., <TextLink reference="13"></TextLink>, <TextLink reference="14"></TextLink>). When diagnosing individually, physicians generate and evaluate evidence based on patient information, weigh the evidence with respect to differential hypotheses and draw conclusions (i.e., make a medical decision) based on the diagnostic process <TextLink reference="14"></TextLink>, <TextLink reference="15"></TextLink>. The quality of individual diagnostic activities is influenced by professional medical strategic and conceptual knowledge <TextLink reference="16"></TextLink>. However, more than one diagnostician is often involved in diagnosing a patient or making treatment decisions. For example, in medical consultations a responsible physician calls in the expertise of another health-care professional. Another example are discussion rounds such as tumor boards in which physicians with different professional backgrounds exchange and discuss patient information. In both examples, diagnosticians have the joint goal to make the best clinical decision. When diagnosing collaboratively, the professional medical knowledge, the knowledge about the patient, and outcomes of diagnostic reasoning processes might differ between the diagnosticians. Therefore, collaborative activities are necessary in addition to the individual diagnostic activities to coordinate the individuals&#8217; diagnostic processes. Based on the collaborative problem-solving framework by Liu and colleagues <TextLink reference="3"></TextLink> and the scientific discovery as dual search (SDDS) model <TextLink reference="17"></TextLink>, Radkowitsch and colleagues <TextLink reference="12"></TextLink> proposed a model for collaborative diagnostic reasoning (CRD model, see figure 1 <ImgLink imgNo="1" imgType="figure"/>) describing collaborative diagnostic processes with individual and collaborative diagnostic activities. These collaborative activities are sharing, elicitation, negotiation, and coordination. According to the CDR model, evidences and hypotheses generated and evaluated during diagnostic processes are kept in individual diagnostic spaces (dashed lines and boxes). All evidences and hypotheses that are available to all collaborators are represented in shared diagnostic spaces (dotted boxes). For evidences and hypotheses to become part of a shared diagnostic space, the diagnosticians need to conduct the proposed collaborative activities (dotted lines). For example, an internist diagnosing a patient suffering from fever and shortness of breath might generate the hypothesis of pneumonia. In order to reduce the uncertainty of this hypotheses, the internist consults a radiologist to perform a radiologic test. The quality and relevance of the information that the internist shares with the radiologist may influence the hypotheses generated and the conclusions drawn by the radiologist and further affect, which information is shared, negotiated or elicited by the radiologist. In turn, the evidences and hypotheses shared, elicited, or negotiated by the radiologist may influence the internist&#8217;s individual diagnostic process. Hence, the proposed collaborative activities are considered important for the quality of medical decisions. Based on models and findings on team cognition, we assume that the quality of collaborative activities is influenced by the team members&#8217; meta-knowledge <TextLink reference="18"></TextLink>, <TextLink reference="19"></TextLink>. By meta-knowledge we mean the knowledge a team member holds about the other team members&#8217; roles, their knowledge, and their task. Meta-knowledge has been shown to particularly influence collaborative activities of collaborators (e.g., <TextLink reference="20"></TextLink>). Among collaborative activities, information sharing has received particular attention. Sharing or rather the lack of sharing can affect the accuracy of the diagnoses, but at the same time diagnosticians often fail to share relevant information with others <TextLink reference="5"></TextLink>, <TextLink reference="21"></TextLink>. </Pgraph></TextBlock>
    <TextBlock linked="yes" name="3. Conducting research on advancing collaborative diagnostic reasoning with simulations">
      <MainHeadline>3. Conducting research on advancing collaborative diagnostic reasoning with simulations</MainHeadline><Pgraph>Simulations are an established method to foster competences in medical education as well as in other educational contexts such as teacher trainings <TextLink reference="22"></TextLink>, pilot trainings <TextLink reference="23"></TextLink>, or military trainings <TextLink reference="24"></TextLink>. In all these contexts, the application of knowledge is a crucial part of professional practice &#91;e.g., <TextLink reference="25"></TextLink>. Simulations allow to practice the application of knowledge in a risk-free environment <TextLink reference="26"></TextLink>. More importantly, however, simulations allow for the deliberate practice <TextLink reference="27"></TextLink> of particularly difficult or complex subtasks. That means that within simulations, learners can repeatedly solve (sub-)tasks that they are yet not able to complete. Research on the deliberate practice has shown that this type of practice is particularly crucial during the development of professional expertise <TextLink reference="27"></TextLink>. Besides, the application of knowledge in complex domains such as medicine can be overwhelming for learners. To facilitate learning, the complexity of these situation can be reduced in simulations and thereby offer a tradeoff between an approximation-of-practice and authentic representations of real-world situations <TextLink reference="28"></TextLink>. Research on the effectiveness of simulations shows positive effects on cognitive, behavioral, and affective learning outcomes in medicine as well as in other domains <TextLink reference="7"></TextLink>, <TextLink reference="8"></TextLink>, <TextLink reference="29"></TextLink>. However, a recent review shows that to advance diagnostic competences, the provision of additional instructional support beyond the opportunity to solve problems is beneficial <TextLink reference="9"></TextLink>. We propose a research agenda to investigate conditions under which diagnostic competences are effectively advanced when learning with simulations <TextLink reference="2"></TextLink>. For empirical laboratory research on complex competences it is necessary to focus on empirically measurable aspects. Hence, we focus on information sharing as subskill of collaborative diagnostic reasoning. </Pgraph><Pgraph>When conducting research on the effectiveness of different instructional means, educational research typically uses controlled experiments. That means that two or more groups of learners receive different types of support in an intervention phase. By using unsupported pre- and post-tests, the learning gain of the different groups of learners is assessed &#91;e.g., <TextLink reference="30"></TextLink>. The average performance of groups is then compared to identify the effects of the intervention. To realize the proposed research agenda <TextLink reference="2"></TextLink>, we developed a simulation that will be used in experiments to facilitate but also to assess collaborative diagnostic reasoning, in particular the sharing of information during diagnosing. During the intervention, learners will receive different versions of the simulation. During the pre- and posttest, the simulation will be used to assess the competence levels of groups of learners. Hence, it is an important prerequisite that the simulation differentiates between different competence levels, as well as that the simulation is suitable for the competence level of the targeted group. Using simulations for the assessment of competences is a common approach in medical education <TextLink reference="31"></TextLink>. For example, simulations are used to assess procedural skills such as conducting rectal examinations <TextLink reference="32"></TextLink>, medical communication skills <TextLink reference="33"></TextLink>, or diagnostic reasoning <TextLink reference="34"></TextLink>. When using simulations to assess competences, it is highly relevant that the simulation consists of authentic representations of real-world situations in which the respective competences is typically used <TextLink reference="31"></TextLink>, <TextLink reference="35"></TextLink>. For assessing diagnostic reasoning skills, simulations usually present patient cases for which learners need to come up with the most likely diagnosis <TextLink reference="31"></TextLink>. A systematic review on simulations shows that the evaluation of simulations with respect to their validity as assessment tool lacks thoroughness <TextLink reference="36"></TextLink>. Therefore, in the present paper we seek to examine whether the simulation developed to realize our research agenda is a valid instrument for the assessment of between group differences of competence levels. </Pgraph></TextBlock>
    <TextBlock linked="yes" name="4. Validating a simulation of collaborative diagnostic reasoning: constructing a validity argument">
      <MainHeadline>4. Validating a simulation of collaborative diagnostic reasoning: constructing a validity argument</MainHeadline><Pgraph>In his validity framework, Kane <TextLink reference="1"></TextLink> describes validation as the process to collect and to evaluate validity evidence to judge the appropriateness of interpretations of the results of the assessment. Four typical inferences are drawn when concluding from a test score to a real score which need critical examination with respect to their validity: scoring, generalization, extrapolation, and implications. Each of these inferences are typically based on implicit assumptions that need to be considered during a validation process <TextLink reference="10"></TextLink>, <TextLink reference="37"></TextLink>. In this paper, we explicate these assumptions for the simulation-based assessment of collaborative diagnostic reasoning that has the goal to identify conditions under which collaborative diagnostic reasoning can be effectively facilitated. All considered assumptions as well as their warrants are listed in table 1 <ImgLink imgNo="1" imgType="table"/>. The first inference, scoring, refers to matching an observation to a single score <TextLink reference="38"></TextLink>. For example, in our simulation a medical student proposes a diagnosis for a patient case which is then scored by the experimenter. A valid scoring procedure requires the observations to be correctly transformed into a consistent score and that raters of the accuracy of the final diagnoses show reliable ratings as indicated by high inter-rater agreements (assumption 1.1). The second inference, generalization, refers to generalizing the single score to a test score <TextLink reference="38"></TextLink>. In our simulation, we generalize from the information sharing skills shown in one simulated patient case to the information sharing skills shown in several other simulated patient cases. A valid generalization inference is shown, if scores on a single performance (e.g., a final diagnosis of one patient) aligns with an overall score (e.g., all final diagnoses given during the test setting). Hence, high internal consistency of the measures are indicators for plausible extrapolation inferences (assumption 2.1). Extrapolation refers to generalizing from the test score to the real performance <TextLink reference="38"></TextLink>. In our simulation, we would hope that medical students who are better in collaborative diagnostic reasoning in our simulation would also be better in collaborative diagnostic reasoning when working with real patients and colleagues. Hence, validity evidence should ideally show that the collaborative diagnostic reasoning of groups of learners shown within our simulations is representative for their collaborative diagnostic reasoning outside the simulation. To ensure that, we propose several validity indicators: First of all, it would be strong evidence for a valid extrapolation inference if experienced practitioners from the field rated the simulation as authentic (assumption 3.1) <TextLink reference="35"></TextLink>. We consider experienced practitioners able to judge whether the simulated setting represents real life practices. Secondly, a valid assessment requires that medical practitioners and medical students with high prior knowledge show better test performance (i.e., more accurate and more efficient diagnostic performance) compared to medical students with low prior knowledge (assumption 3.2). The assumption is that on average those showing higher performance in real life settings on average also show higher performance within the simulation. A third validity indicator for the extrapolation inference are differences between persons with different levels of prior knowledge with respect to cognitive load. The cognitive load theory assumes that learning imposes different kinds of cognitive load on learners. Particularly, the intrinsic cognitive load which is caused by the complexity of the learning material should be lower for people with high prior knowledge compared to less knowledgeable medical students <TextLink reference="39"></TextLink>. With higher prior knowledge, the learning material becomes less complex as the material is better cognitively organized and, therefore, imposes less intrinsic load (assumption 3.3). Importantly, to assess the effectiveness of different kinds of simulations, we compare groups of learners rather than individuals. That means that all decisions will be based on group means rather than individual test results. Therefore, a further assumption is that differences between groups of learners result from the intervention and not from random or systematic prior differences between groups (assumption 3.4). Therefore, it is important to use an experimental approach. The final inference, implications, refers to the conclusions drawn, and decisions made based on the test results <TextLink reference="1"></TextLink>, <TextLink reference="10"></TextLink>, <TextLink reference="38"></TextLink>. Hence, the final assumption is that the resulting data can be used to draw inferences on the effectiveness of different kinds of simulations (assumption 4.1). If the prior assumptions were met, then the implications drawn from the results would be valid. </Pgraph><Pgraph>Considering the intended use of the instrument to be validated is important for the construction of a validity argument as this helps to prioritize the evidence <TextLink reference="10"></TextLink>. The intended use of the simulation described in this paper is to assess collaborative diagnostic reasoning of groups of learners in experimental studies. Although every described validity evidence is considered important for the construction of the validity argument, some of the evidences are considered crucial. For our intended use, we argue that particularly the identification of different levels of competence among participants with different levels of prior knowledge would offer the most important validity evidence as this evidence is closest to the final use of the simulation. Although due to content specificity of diagnostic skills, it seems hard to achieve reliable measures in medicine <TextLink reference="13"></TextLink>, <TextLink reference="31"></TextLink>, it is particularly important to have coherent measures that allow generalizing from one item to another as this would offer evidence that the same skill is assessed in different items. </Pgraph></TextBlock>
    <TextBlock linked="yes" name="5. Research questions of the validation study">
      <MainHeadline>5. Research questions of the validation study</MainHeadline><Pgraph>Based on the validity framework and the validity indicators described above, we conducted a validation study to answer the following research questions: </Pgraph><Pgraph><OrderedList><ListItem level="1" levelPosition="1" numString="1.">Scoring: To what extent are the measures of collaborative diagnostic reasoning objective&#63; </ListItem><ListItem level="1" levelPosition="2" numString="2.">Generalization: To what extent are the measures of collaborative diagnostic reasoning consistent&#63; </ListItem><ListItem level="1" levelPosition="3" numString="3.">Extrapolation: </ListItem><OrderedList><ListItem level="2" levelPosition="1" numString="1.">To what extent do medical practitioners perceive the simulation as authentic&#63;</ListItem><ListItem level="2" levelPosition="2" numString="2.">To what extent do groups with different levels of prior knowledge differ with respect to a) their collaborative diagnostic reasoning (information sharing skills, diagnostic efficiency, and diagnostic accuracy) within the simulation and b) to the reported intrinsic cognitive load&#63;</ListItem></OrderedList></OrderedList></Pgraph></TextBlock>
    <TextBlock linked="yes" name="6. Method">
      <MainHeadline>6. Method</MainHeadline><SubHeadline2>6.1. Development of the simulation to assess collaborative diagnostic reasoning </SubHeadline2><Pgraph>Our goal is to develop a tool for the assessment of the specific subskills of collaborative diagnostic reasoning as defined above. We chose a simulation-based approach to assess collaborative diagnostic reasoning <TextLink reference="7"></TextLink>, <TextLink reference="8"></TextLink>. As described above, the construct of collaborative diagnostic reasoning is rather broad and can be assessed in a broad range of contexts. For example, different physicians such as internists, surgeons, or gynecologists could collaborate with nurses or other health-related professionals. We assume that the context of collaboration (such as the meta-knowledge about the collaborators&#8217; profession) influences collaborative diagnostic processes. We, therefore, decided to narrow down the simulated context to a situation that is relevant in real-world practices and particularly difficult for learners. Hence, we defined the simulated context as a collaborative situation between internists and radiologists based on practitioners&#8217; experiences. Interviews with seven practitioners from both disciplines were conducted to identify a specific situation that is considered as being problematic frequently. The interviews yielded that the main problem is unspecific test requests, that is unprecise justifications for the test (e.g., missing relevant patient information) and a lack of clustering of patient information. As a consequence, we decided to focus on information sharing during the request of a radiologic examination as an important and specific aspect of collaborative diagnostic reasoning. Next, we decided to use a computer-based simulation and chose the case-based learning platform CASUS (https:&#47;&#47;www.instruct.eu&#47;). Computer-based simulations have several advantages compared to other types of simulations such as standardized patients (e.g., <TextLink reference="33"></TextLink>). First, the use of the simulation is extremely economical once the material is developed as several participants can interact with the simulation at the same time and, for example, no actors are needed. Secondly, web-based simulations are easily accessible for participants and, hence, time and place restrictions are low. Thirdly, all case material as well as instructions are standardized and, therefore, do not confound the assessment. To develop the simulation, paper prototypes of the scenario and patient cases were constructed and evaluated by an expert committee from medicine, software development, and psychology. Whereas internists, radiologists, and a general practitioner developed the case material for ten patient cases, a software developer programed the simulation. The case material was then evaluated and revised in a one-day expert-workshop, with focus on the case structure, the most plausible solution, as well as the sample solution. Finally, the simulation was implemented on the CASUS learning platform (see figure 2 <ImgLink imgNo="2" imgType="figure"/>). </Pgraph><Pgraph>In a pilot study, the simulation with one patient case was presented to eight medical students (<Mark2>M</Mark2><Subscript>age</Subscript>&#61;24.5, <Mark2>SD</Mark2><Subscript>Age</Subscript>&#61;3.9; <Mark2>M</Mark2><Subscript>Semester</Subscript>&#61;7.6, <Mark2>SD</Mark2><Subscript>Semester</Subscript>&#61;1.2) to evaluate the user experience of the simulation (UEQ; <TextLink reference="40"></TextLink>). Results indicated high values on the subscales attractiveness, perspicuity, stimulation, and novelty, but rather low values on the subscale dependability. To increase the perceived control for participants, a fiction contract containing information about the simulated scenario and the role learners are expected to take up as well as a technical familiarization giving detailed instructions on how to handle the simulation were developed. After having read the fiction contract and the familiarization, participants start the first simulated patient case. Participants first receive a patient file that they scan for symptoms and findings in the role of an internist. The patient file consists of a short patient presentation, medical history, a description of the physical examination, as well as the most important laboratory values. Afterwards, learners request a radiologic test from a simulated radiologist. For that, they are asked to fill in a request form by choosing among 42 different combinations of methods and body parts and by sharing patient information or differential diagnoses that are considered relevant for the radiologist. Only learners who appropriately justified their request (i.e., show high information sharing skills) receive a description of the radiologic findings, and, if provided by the learner, an evaluation of a specific differential diagnose from the simulated radiologist. We decided beforehand with radiologists which information is needed to justify a specific radiologic test. After having read the radiologic result, medical students can ask questions about the radiologic findings, share further information, or request further examinations. To solve the patient case, participants suggest a diagnosis and back it up with justifying findings and suggest further differential diagnoses and treatment or diagnostic measures. For a more detailed description of the simulation and the process of development, see <TextLink reference="12"></TextLink>. In sum, in our simulation medical students are supposed to gather and integrate information from a patient file, and to collaboratively generate radiologic evidence by sharing relevant patient information with the radiologist. By that the medical student elicits relevant information from the radiologist, which they then integrate into prior information to arrive at a final diagnosis. Bearing in mind our definition of collaborative diagnostic reasoning, the simulation allows us to separately assess and facilitate both, collaborative diagnostic reasoning (i.e., information sharing) as well as individual diagnostic reasoning (i.e., the final diagnosis). </Pgraph><SubHeadline2>6.2. Sample and design</SubHeadline2><Pgraph>A quasi-experimental study with a one-factorial design consisting of three levels (low vs advanced vs high prior knowledge level) was conducted. We defined medical students between the 5<Superscript>th</Superscript> and 8<Superscript>th</Superscript> semester (<Mark2>N</Mark2>&#61;45, <Mark2>N</Mark2><Subscript>female</Subscript>&#61;31) of a total of 12 semesters as low prior knowledge (PK) (<Mark2>M</Mark2><Subscript>PK</Subscript>&#61;6.4 semesters, <Mark2>SD</Mark2><Subscript>PK</Subscript>&#61;0.7) as they had only few courses on internal medicine and radiology according to their study plan. Medical students from the 9<Superscript>t</Superscript>h semester and above (<Mark2>N</Mark2>&#61;28, <Mark2>N</Mark2><Subscript>female</Subscript>&#61;19) were categorized as advanced prior knowledge (<Mark2>M</Mark2><Subscript>PK</Subscript>&#61;11.5 semesters, <Mark2>SD</Mark2><Subscript>PK</Subscript>&#61;1.9) as they already participated in courses for internal medicine and radiology according to their study plan. Internists and residents for Internal Medicine after completion of the 3 years of common trunk (<Mark2>N</Mark2>&#61;25, <Mark2>N</Mark2><Subscript>female</Subscript>&#61;11) were categorized as high prior knowledge (<Mark2>M</Mark2><Subscript>PK</Subscript>&#61;13.6 years, <Mark2>SD</Mark2><Subscript>PK</Subscript>&#61;10.5) as they are expected to have practical experience. </Pgraph><SubHeadline2>6.3. Procedure</SubHeadline2><Pgraph>The study was conducted as a laboratory study with a maximum of eight participants at a time. All participants consecutively worked individually on five computer-based patient cases as described above for as long as they wanted. The participants were asked to work efficiently. After the second and the fifth case, participants completed a test measuring perceived authenticity as well as intrinsic cognitive load. Afterwards, participants were debriefed and thanked for their participation with 25&#8364;.</Pgraph><SubHeadline2>6.4. Measures</SubHeadline2><Pgraph>Within the simulation, we obtained three measures to assess the collaborative diagnostic reasoning: diagnostic accuracy, diagnostic efficiency, and information sharing skills. We used Likert-scaled items to assess the perceived authenticity of the simulation as well as the perceived intrinsic cognitive load (see table 2 <ImgLink imgNo="2" imgType="table"/>).  </Pgraph><SubHeadline3>Diagnostic accuracy</SubHeadline3><Pgraph>The solution of the patient case (i.e., the suggested final diagnosis), differential diagnoses, and further necessary diagnostic or treatment steps were used to score the diagnostic accuracy. Depending on how specific the given diagnosis was, participants received 0, 0.5 or 1 point for each diagnosis and up to one additional point each for the quality of the differential diagnoses and the quality of the indicated further steps. Points were given based on the sample solution that was developed in the expert workshop. The mean diagnostic accuracy across the five patient cases (ranging from 0 to 3) was calculated for each participant. </Pgraph><SubHeadline3>Diagnostic efficiency</SubHeadline3><Pgraph>The diagnostic accuracy weighted by the time needed to solve a single patient case indicated the diagnostic efficiency. The mean diagnostic efficiency across the five patient cases was calculated for each participant.</Pgraph><SubHeadline3>Information sharing skills</SubHeadline3><Pgraph>The information sharing skills were operationalized as the inverted proportion of requests rejected by the simulated radiologist due to insufficient justification per case. Whether a justification is perceived as sufficient or insufficient by the simulated radiologist was defined beforehand in collaboration with expert radiologists based on how relevant information is for a radiologist to conduct a radiologic test. For this measure, values were obtained directly via the logfiles. The mean score of all five patient cases (ranging from 0 to 1) was calculated for each participant. A mean score of 1 means that all requests in all patient cases were accepted by the radiologist.</Pgraph><SubHeadline3>Perceived authenticity</SubHeadline3><Pgraph>The perceived authenticity was assessed with three items each with respect to the overall simulation and with respect to the collaborative process <TextLink reference="41"></TextLink> on a 5-point Likert scale ranging from 1 (does not apply) to 5 (does apply). The perceived authenticity of the simulation as well as the authenticity of the collaborative process was assessed twice. An example item for authenticity is &#8220;I perceive the &#91;simulation&#93; &#47; &#91;the collaboration with the radiologist&#93; as authentic&#8221;. </Pgraph><SubHeadline3>Intrinsic cognitive load</SubHeadline3><Pgraph>Intrinsic cognitive load was assessed with one item on a 5-point Likert scale ranging from 1 (very easy) to 5 (very difficult) <TextLink reference="42"></TextLink>. The item text was &#8220;How easy or difficult do you find the collaboration with a radiologist at the moment&#63;&#8221;.</Pgraph><SubHeadline2>6.5. Statistical analyses</SubHeadline2><Pgraph>To answer research question 1, we obtained the intraclass correlation (ICC) based on a two-way random effects model with absolute agreement for the main diagnoses, the differential diagnoses, and the indicated further steps. For that, two raters independently coded 20&#37; of the cases. </Pgraph><Pgraph>To address research question 2, we calculated the internal consistency measure Cronbach&#8217;s alpha with respect to the diagnostic efficiency, to the information sharing skills, and to the diagnostic accuracy. </Pgraph><Pgraph>To answer research question 3.1., we calculated the mean of both measurement times and contrasted it to a threshold of 3.0 using a one-sample t-test. The means above the threshold indicate that participants with high levels of prior knowledge on average rate the overall simulation and the collaborative process as rather authentic or authentic. </Pgraph><Pgraph>To address research question 3.2., we conducted ANOVAs and Bonferroni post-hoc tests with the independent variable prior knowledge and the dependent variables diagnostic accuracy, diagnostic efficiency, information sharing skill, as well as intrinsic cognitive load. If preconditions for calculating an ANOVA were not met, we conducted the non-parametric Kruskal-Wallis-Test and Wilcoxon post-hoc tests instead. Confidence intervals are calculated with bootstrapping.  </Pgraph></TextBlock>
    <TextBlock linked="yes" name="7. Results of the validation study">
      <MainHeadline>7. Results of the validation study</MainHeadline><SubHeadline2>Scoring</SubHeadline2><Pgraph>With respect to the first research question, we obtained high values for all three variables: The interrater agreement for the quality of the final diagnoses and for the further indicated steps was ICC&#61;1. For the differential diagnoses, the interrater agreement was ICC&#61;0.94. This indicates that raters objectively scored the observations during the simulation. </Pgraph><SubHeadline2>Generalization</SubHeadline2><Pgraph>With respect to research question 2, analyses yielded a Cronbach&#8217;s alpha of .66 for the diagnostic accuracy, a Cronbach&#8217;s alpha of .53 for the diagnostic efficiency, and a Cronbach&#8217;s alpha of .33 for the information sharing skills. This indicates that the evidence for the generalization inference being valid is acceptable for the diagnostic accuracy and the diagnostic efficiency but limited for the information sharing skills. </Pgraph><SubHeadline2>Extrapolation</SubHeadline2><Pgraph>With respect to research question 3.1., participants with high prior knowledge rated the perceived authenticity of the overall simulation as <Mark2>M</Mark2>&#61;3.89 (<Mark2>SD</Mark2>&#61;0.91) and the authenticity of the simulated collaborative process as <Mark2>M</Mark2>&#61;3.57 (<Mark2>SD</Mark2>&#61;0.91). Both authenticity ratings are significantly above the threshold of 3 (<Mark2>t</Mark2>(24)&#61;4.9, <Mark2>p</Mark2>&#60;.01 and <Mark2>t</Mark2>(24)&#61;3.14, <Mark2>p</Mark2>&#60;.01). This indicates that, on average, practitioners with high levels of prior knowledge perceive the simulation as rather authentic or authentic. Concerning research question 3.2., see table 3 <ImgLink imgNo="3" imgType="table"/> for the descriptive statistics and figure 3 <ImgLink imgNo="3" imgType="figure"/>, a-d for between-group comparisons. The results show that the prior knowledge groups differ significantly with respect to the diagnostic accuracy (<Mark2>F</Mark2>(2,95)&#61;11.62, <Mark2>p</Mark2>&#60;.001, <Mark2>&#951;</Mark2><Mark2><Superscript>2</Superscript></Mark2>&#61;0.20). The high and advanced prior knowledge group show significantly higher accuracy than the low prior knowledge group but are not significantly different from each other. However, we found solution rates of up to 0.94 (i.e., the correctness of the final diagnosis) for three of the five patient cases indicating ceiling effects for the final diagnoses. The prior knowledge groups also differ significantly with respect to the diagnostic efficiency (<Mark2>&#967;</Mark2><Mark2><Superscript>2</Superscript></Mark2>(2)&#61;34.29, p&#60;.001, <Mark2>&#951;</Mark2><Mark2><Superscript>2</Superscript></Mark2>&#61;0.34) and with respect to the information sharing skills (<Mark2>&#967;</Mark2><Mark2><Superscript>2</Superscript></Mark2>(2)&#61;12.48, p&#60;.002, <Mark2>&#951;</Mark2><Mark2><Superscript>2</Superscript></Mark2>&#61;0.11). For both outcomes, the high and advanced prior knowledge groups again outperform the low prior knowledge group but do not differ significantly from each other. The prior knowledge groups further differ with respect to the reported intrinsic cognitive load (<Mark2>&#967;</Mark2><Mark2><Superscript>2</Superscript></Mark2>(2)&#61;38.25, p&#60;.001, <Mark2>&#951;</Mark2><Mark2><Superscript>2</Superscript></Mark2>&#61;0.38). The high prior knowledge group reported the lowest intrinsic cognitive load, followed by the advanced, and the low prior knowledge groups. All comparisons are statistically significant. </Pgraph></TextBlock>
    <TextBlock linked="yes" name="8. Discussion">
      <MainHeadline>8. Discussion</MainHeadline><Pgraph>The objective of this study was to collect initial validity evidence for the simulation we developed to conduct further experimental research on facilitating collaborative diagnostic reasoning in medical education. The validation of the simulation was based on a theoretical model describing collaborative diagnostic processes (CDR model; <TextLink reference="12"></TextLink>). The simulation focusses on one of the proposed collaborative activities, namely information sharing. The CDR model suggests that which information is shared by one diagnostician influences the diagnostic processes of another diagnostician. In case of the simulation, which information is shared by a learner in the role of an internist influences whether a radiologist conducts a radiologic test and how it is interpreted. An argument for initial validity was constructed by applying Kane&#8217;s <TextLink reference="1"></TextLink> validity framework to the context of experimental research based on a simulation. The underlying assumptions were made explicit and supported by warrants (see table 1 <ImgLink imgNo="1" imgType="table"/>). However, the strength of these warrants varies between inferences. We were able to show quite clearly that the single observations within the simulation can be assessed objectively as all materials were developed and evaluated by expert committees from different disciplines, and some of the variable scores are generated automatically (scoring). This reduces human errors during the transformation of the observation to a single score. For the variables where coding was necessary, inter-rater reliability was high. We conclude that no further evidence for the validity of the scoring procedure is necessary. Further, we found satisfying validity evidence for the question whether the results of the simulation can be transferred to real-world scenarios by comparing participants with different prior knowledge with respect to their performance and their indicated cognitive load in the simulation (extrapolation). We find that medical students and practitioners with high levels of prior knowledge indeed show higher information sharing skills than medical students with low levels of prior knowledge. This indicates that the simulation enables differentiating between levels of competence of different groups which is the intended use of the simulation. However, there is one exception. We found rather high solution rates for the patient cases, even with students on low levels of prior knowledge, indicating ceiling effects for the case solution included in the measures diagnostic accuracy and diagnostic efficiency. Higher case difficulty would allow to better distinguish between different levels of the competences under consideration which is why case difficulty was increased by adding further distracting information. Nevertheless, it is a recurrent finding in medical education that intermediates and experts do not differ in the accuracy of the diagnoses, but rather in the efficiency with which they come up with the correct solution <TextLink reference="31"></TextLink>. An explanation for this effect is that the knowledge of experts is better organized (i.e., encapsulation of knowledge) compared to the knowledge of intermediates. This superior organization of knowledge enables experts to more efficiently come to a correct diagnosis <TextLink reference="43"></TextLink>. This pattern of effects is illustrated in our data as the difference between intermediates and experts is descriptively larger for diagnostic efficiency than for diagnostic accuracy. Furthermore, the simulation was rated as rather authentic by practitioners from the field. Ultimately, when conducting experiments with the simulation to compare learning gains of groups of learners, it is of prime importance to additionally rule out prior differences between groups as confounding factors. This could be achieved by randomly distributing learners to experimental groups and by controlling for prior knowledge. Assuming that the simulation is used in randomized experiments, the validation study yielded satisfying evidence for the extrapolation inference. The weakest evidence was found for the assumption that scores from a single observation can be reliably summarized to an overall score (generalization). For two of the three variables of interest (diagnostic efficiency and diagnostic accuracy), the validity evidence is acceptable. For the information sharing skills, we obtained only low internal consistency indicating that across patient cases, learners show varying levels of information sharing quality. One explanation for the generally rather low value might be the small number of observations as the likelihood of higher reliability values increases with the number of observations. Generally, low consistency across different patients is a well-known problem in medical education and is also known as content specificity <TextLink reference="13"></TextLink>. That means that the diagnostic accuracy between patient cases correlates poorly (0.1-0.3) <TextLink reference="13"></TextLink>. That the consistency across patient cases is particularly low for collaborative diagnostic activities such as information sharing might be explained by the CDR model: Whereas individual diagnostic processes are influenced by medical knowledge, collaborative diagnostic reasoning is further influenced by the professional collaboration knowledge (e.g. meta-knowledge). For example, a student might know which information to share for a patient suffering pneumonia, but not for a patient suffering lung cancer. Hence, the measure for information sharing skill might be affected by both, professional medical content knowledge and professional meta-knowledge about the collaboration partners&#8217; discipline. Hence, the presented evidence for the generalization inference, particularly for information sharing skills, of our simulation gives rather limited support for the validity which is why further evidence is necessary. </Pgraph><SubHeadline2>8.1. Limitations</SubHeadline2><Pgraph>Of course, the present study is not without limitations that must be considered when interpreting its findings. First of all, the simulation is meant to represent collaborative diagnostic reasoning, however, we focus on a very specific subskill which is the sharing of information in diagnostic situations. This is a narrow focus and the results will not easily generalize to other subskills such as negotiation of differential diagnoses. However, we consider the subskill sharing as a particularly important part of collaborative diagnostic reasoning as prior literature has shown how important and how error-prone the sharing of relevant information is for the field of medicine (e.g., <TextLink reference="5"></TextLink>, <TextLink reference="21"></TextLink>). Similar findings have also been reported in other fields (e.g., <TextLink reference="20"></TextLink>, <TextLink reference="44"></TextLink>). The simulation will be used to scaffold the learning of sharing processes and we are convinced that our findings will be of use in other diagnostic situations in which sharing among diagnosticians is necessary as well. </Pgraph><Pgraph>Additionally, our validity argument is based to a large extent on a comparison between experts and novices. Such comparisons have been criticized as novices and experts differ in several variables which are oftentimes unrelated to the construct under investigation such as the probability of having grey hair (&#34;grey hair index&#34;, <TextLink reference="45"></TextLink>, p. 830). However, we do not intend to argue that the expert-novice comparison shows that we&#8217;re actually measuring the construct of interest. Instead, we argue that the expert-novice comparison shows that we are able to measure competence differences between groups using the simulation. Also, the intended use of the simulation is not to make judgements about individual competences of learners but rather to compare learning gains of groups to make judgements about the simulation&#8217;s effectiveness under different instructional conditions. Therefore, we consider the results of comparisons between different levels of prior knowledge as a meaningful contribution to our validation argument. </Pgraph></TextBlock>
    <TextBlock linked="yes" name="9. Conclusion">
      <MainHeadline>9. Conclusion</MainHeadline><Pgraph>In this article, we presented the collection of initial validity evidences for the simulation which we developed to investigate the facilitation of collaborative diagnostic reasoning &#8211; and more particularly information sharing &#8211; with simulations. Our validation process allows concluding that the simulation that was developed based on theory is indeed authentic enough with respect to both diagnostic process and collaboration. Importantly, more advanced students and practitioners are more efficient than students in earlier phases of their studies and experience less intrinsic cognitive load. More knowledgeable learners are also better able to interact successfully with the simulated radiologist. Thus, we were able to find initial validity evidence that the simulation can be used to assess whether interventions differ in their impact on the learning of collaborative diagnostic reasoning. With respect to the assessment of the information sharing skills as subcomponent of the collaborative diagnostic reasoning there is, however, a need for improvement concerning the reliability. As the reliability of assessments is considered one of the most important evidence components, this is still an important gap in the validity argument. Refining the measurement and increasing the number of observations might help to close this gap. </Pgraph><Pgraph>Collecting validity evidence about simulations for diagnostic reasoning still seems uncommon <TextLink reference="36"></TextLink>. Yet, the construction of a validity argument helped us to understand the strength and weaknesses of the simulation for its intended use. This is an important step and will help us to interpret the results of planned experiments. Besides some gaps in the validity argument that will be addressed further, the simulation is a solid instrument to empirically examine the advancement of collaborative diagnostic reasoning of medical students. </Pgraph></TextBlock>
    <TextBlock linked="yes" name="Funding">
      <MainHeadline>Funding</MainHeadline><Pgraph>The research presented in this contribution was funded by a grant of the Deutsche Forschungsgemeinschaft (DFG) to Frank Fischer, Martin R. Fischer and Ralf Schmidmaier (FI 792&#47;11-1).</Pgraph></TextBlock>
    <TextBlock linked="yes" name="Competing interests">
      <MainHeadline>Competing interests</MainHeadline><Pgraph>The authors declare that they have no competing interests. </Pgraph></TextBlock>
    <References linked="yes">
      <Reference refNo="2">
        <RefAuthor>Heitzmann N</RefAuthor>
        <RefAuthor>Seidel T</RefAuthor>
        <RefAuthor>Opitz A</RefAuthor>
        <RefAuthor>Hetmanek A</RefAuthor>
        <RefAuthor>Wecker C</RefAuthor>
        <RefAuthor>Fischer M</RefAuthor>
        <RefAuthor>Ufer S</RefAuthor>
        <RefAuthor>Schmidmaier R</RefAuthor>
        <RefAuthor>Neuhaus B</RefAuthor>
        <RefAuthor>Siebeck M</RefAuthor>
        <RefAuthor>St&#252;rmer K</RefAuthor>
        <RefAuthor>Obersteiner A</RefAuthor>
        <RefAuthor>Reiss K</RefAuthor>
        <RefAuthor>Girwidz R</RefAuthor>
        <RefAuthor>Fischer F</RefAuthor>
        <RefTitle>Facilitating diagnostic competences in simulations: A conceptual framework and a research agenda for medical and teacher education</RefTitle>
        <RefYear>2019</RefYear>
        <RefJournal>Front Learn Res</RefJournal>
        <RefPage>1-24</RefPage>
        <RefTotal>Heitzmann N, Seidel T, Opitz A, Hetmanek A, Wecker C, Fischer M, Ufer S, Schmidmaier R, Neuhaus B, Siebeck M, St&#252;rmer K, Obersteiner A, Reiss K, Girwidz R, Fischer F. Facilitating diagnostic competences in simulations: A conceptual framework and a research agenda for medical and teacher education. Front Learn Res. 2019;7(4):1-24. DOI: 10.14786&#47;flr.v7i4.384</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.14786&#47;flr.v7i4.384</RefLink>
      </Reference>
      <Reference refNo="3">
        <RefAuthor>Liu L</RefAuthor>
        <RefAuthor>Hao J</RefAuthor>
        <RefAuthor>von Davier AA</RefAuthor>
        <RefAuthor>Kyllonen P</RefAuthor>
        <RefAuthor>Zapata-Rivera D</RefAuthor>
        <RefTitle>A tough nut to crack: Measuring collaborative problem solving</RefTitle>
        <RefYear>2015</RefYear>
        <RefBookTitle>Handbook of Research on Computational Tools for Real-World Skill Development</RefBookTitle>
        <RefPage>344-359</RefPage>
        <RefTotal>Liu L, Hao J, von Davier AA, Kyllonen P, Zapata-Rivera D. A tough nut to crack: Measuring collaborative problem solving. In: Rosen Y, Ferrara S, Mosharraf M, editors. Handbook of Research on Computational Tools for Real-World Skill Development. Hershey, PA: IGI Global; 2015. p.344-359. DOI: 10.4018&#47;978-1-4666-9441-5.ch013</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.4018&#47;978-1-4666-9441-5.ch013</RefLink>
      </Reference>
      <Reference refNo="4">
        <RefAuthor>Kiesewetter J</RefAuthor>
        <RefAuthor>Fischer F</RefAuthor>
        <RefAuthor>Fischer MR</RefAuthor>
        <RefTitle>Collaborative clinical reasoning - A systematic review of empirical studies</RefTitle>
        <RefYear>2017</RefYear>
        <RefJournal>J Contin Educ Health Prof</RefJournal>
        <RefPage>123-128</RefPage>
        <RefTotal>Kiesewetter J, Fischer F, Fischer MR. Collaborative clinical reasoning - A systematic review of empirical studies. J Contin Educ Health Prof. 2017;37(2):123-128. DOI: 10.1097&#47;CEH.0000000000000158</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1097&#47;CEH.0000000000000158</RefLink>
      </Reference>
      <Reference refNo="5">
        <RefAuthor>Tschan F</RefAuthor>
        <RefAuthor>Semmer NK</RefAuthor>
        <RefAuthor>Gurtner A</RefAuthor>
        <RefAuthor>Bizzari L</RefAuthor>
        <RefAuthor>Spychiger M</RefAuthor>
        <RefAuthor>Breuer M</RefAuthor>
        <RefAuthor>Marsch SU</RefAuthor>
        <RefTitle>Explicit reasoning, confirmation bias, and illusory transactive memory: A simulation study of group medical decision making</RefTitle>
        <RefYear>2009</RefYear>
        <RefJournal>Small Group Res</RefJournal>
        <RefPage>271-300</RefPage>
        <RefTotal>Tschan F, Semmer NK, Gurtner A, Bizzari L, Spychiger M, Breuer M, Marsch SU. Explicit reasoning, confirmation bias, and illusory transactive memory: A simulation study of group medical decision making. Small Group Res. 2009;40(3):271-300. DOI: 10.1177&#47;1046496409332928</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1177&#47;1046496409332928</RefLink>
      </Reference>
      <Reference refNo="6">
        <RefAuthor>Farand L</RefAuthor>
        <RefAuthor>Lafrance JP</RefAuthor>
        <RefAuthor>Arocha JF</RefAuthor>
        <RefTitle>Collaborative problem-solving in telemedicine and evidence interpretation in a complex clinical case</RefTitle>
        <RefYear>1998</RefYear>
        <RefJournal>Int J Med Inform</RefJournal>
        <RefPage>153-167</RefPage>
        <RefTotal>Farand L, Lafrance JP, Arocha JF. Collaborative problem-solving in telemedicine and evidence interpretation in a complex clinical case. Int J Med Inform. 1998;51(2-3):153-167. DOI: 10.1016&#47;S1386-5056(98)00112-9</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1016&#47;S1386-5056(98)00112-9</RefLink>
      </Reference>
      <Reference refNo="7">
        <RefAuthor>Cook DA</RefAuthor>
        <RefAuthor>Hatala R</RefAuthor>
        <RefAuthor>Brydges R</RefAuthor>
        <RefAuthor>Zendejas B</RefAuthor>
        <RefAuthor>Szostek JH</RefAuthor>
        <RefAuthor>Wang AT</RefAuthor>
        <RefAuthor>Erwin PJ</RefAuthor>
        <RefAuthor>Hamstra SJ</RefAuthor>
        <RefTitle>Technology-enhanced simulation for health professions education: A systematic review and meta-analysis</RefTitle>
        <RefYear>2011</RefYear>
        <RefJournal>JAMA</RefJournal>
        <RefPage>978-988</RefPage>
        <RefTotal>Cook DA, Hatala R, Brydges R, Zendejas B, Szostek JH, Wang AT, Erwin PJ, Hamstra SJ. Technology-enhanced simulation for health professions education: A systematic review and meta-analysis. JAMA. 2011;306(9):978-988. DOI: 10.1001&#47;jama.2011.1234</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1001&#47;jama.2011.1234</RefLink>
      </Reference>
      <Reference refNo="8">
        <RefAuthor>Gegenfurtner A</RefAuthor>
        <RefAuthor>Quesada-Pallar&#232;s C</RefAuthor>
        <RefAuthor>Knogler M</RefAuthor>
        <RefTitle>Digital simulation-based training: A meta-analysis</RefTitle>
        <RefYear>2014</RefYear>
        <RefJournal>Br J Educ Technol</RefJournal>
        <RefPage>1097-1114</RefPage>
        <RefTotal>Gegenfurtner A, Quesada-Pallar&#232;s C, Knogler M. Digital simulation-based training: A meta-analysis. Br J Educ Technol. 2014;45(6):1097-1114. DOI: 10.1111&#47;bjet.12188</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1111&#47;bjet.12188</RefLink>
      </Reference>
      <Reference refNo="9">
        <RefAuthor>Chernikova O</RefAuthor>
        <RefAuthor>Heitzmann N</RefAuthor>
        <RefAuthor>Fink M</RefAuthor>
        <RefAuthor>Venance T</RefAuthor>
        <RefAuthor>Seidel T</RefAuthor>
        <RefAuthor>Fischer F</RefAuthor>
        <RefTitle>Facilitating diagnostic competences in higher education - A meta-analysis in medical and teacher education</RefTitle>
        <RefYear>2020</RefYear>
        <RefJournal>Educ Psychol Rev</RefJournal>
        <RefPage>157-196</RefPage>
        <RefTotal>Chernikova O, Heitzmann N, Fink M, Venance T, Seidel T, Fischer F. Facilitating diagnostic competences in higher education - A meta-analysis in medical and teacher education. Educ Psychol Rev. 2020;32:157-196. DOI: 10.1007&#47;s10648-019-09492-2</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1007&#47;s10648-019-09492-2</RefLink>
      </Reference>
      <Reference refNo="1">
        <RefAuthor>Kane MT</RefAuthor>
        <RefTitle>Validation</RefTitle>
        <RefYear>2006</RefYear>
        <RefBookTitle>Educational Measurement</RefBookTitle>
        <RefPage>17-64</RefPage>
        <RefTotal>Kane MT. Validation. In: Brennan RL, editor. Educational Measurement. Westport: Praeger; 2006. p.17-64.</RefTotal>
      </Reference>
      <Reference refNo="10">
        <RefAuthor>Cook DA</RefAuthor>
        <RefAuthor>Hatala R</RefAuthor>
        <RefTitle>Validation of educational assessments: A primer for simulation and beyond</RefTitle>
        <RefYear>2016</RefYear>
        <RefJournal>Adv Simul (Lond)</RefJournal>
        <RefPage>31</RefPage>
        <RefTotal>Cook DA, Hatala R. Validation of educational assessments: A primer for simulation and beyond. Adv Simul (Lond). 2016;1:31. DOI: 10.1186&#47;s41077-016-0033-y</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1186&#47;s41077-016-0033-y</RefLink>
      </Reference>
      <Reference refNo="11">
        <RefAuthor>Allen M</RefAuthor>
        <RefAuthor>Sites R</RefAuthor>
        <RefTitle></RefTitle>
        <RefYear>2012</RefYear>
        <RefBookTitle>Leaving ADDIE for SAM: An agile model for developing the best learning experiences</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Allen M, Sites R. Leaving ADDIE for SAM: An agile model for developing the best learning experiences. East Peoria, IL: ASTD Press; 2012.</RefTotal>
      </Reference>
      <Reference refNo="12">
        <RefAuthor>Radkowitsch A</RefAuthor>
        <RefAuthor>Sailer M</RefAuthor>
        <RefAuthor>Fischer MR</RefAuthor>
        <RefAuthor>Schmidmaier R</RefAuthor>
        <RefAuthor>Fischer F</RefAuthor>
        <RefTitle>Diagnosing collaboratively: A theoretical model and a simulation-based learning environment</RefTitle>
        <RefYear></RefYear>
        <RefBookTitle>Learning to diagnose with simulations - Examples from teacher education and medical education</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Radkowitsch A, Sailer M, Fischer MR, Schmidmaier R, Fischer F. Diagnosing collaboratively: A theoretical model and a simulation-based learning environment. In: Fischer F, Opitz A, editors. Learning to diagnose with simulations - Examples from teacher education and medical education. New York, NY: Springer Briefs in Education Series. accepted.</RefTotal>
      </Reference>
      <Reference refNo="13">
        <RefAuthor>Norman G</RefAuthor>
        <RefTitle>Research in clinical reasoning: Past history and current trends</RefTitle>
        <RefYear>2005</RefYear>
        <RefJournal>Med Educ</RefJournal>
        <RefPage>418-427</RefPage>
        <RefTotal>Norman G. Research in clinical reasoning: Past history and current trends. Med Educ. 2005;39(4):418-427. DOI: 10.1111&#47;j.1365-2929.2005.02127.x</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1111&#47;j.1365-2929.2005.02127.x</RefLink>
      </Reference>
      <Reference refNo="14">
        <RefAuthor>Simmons B</RefAuthor>
        <RefTitle>Clinical reasoning: Concept analysis</RefTitle>
        <RefYear>2010</RefYear>
        <RefJournal>J Adv Nurs</RefJournal>
        <RefPage>1151-1158</RefPage>
        <RefTotal>Simmons B. Clinical reasoning: Concept analysis. J Adv Nurs. 2010;66(5):1151-1158. DOI: 10.1111&#47;j.1365-2648.2010.05262.x</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1111&#47;j.1365-2648.2010.05262.x</RefLink>
      </Reference>
      <Reference refNo="15">
        <RefAuthor>Fischer F</RefAuthor>
        <RefAuthor>Kollar I</RefAuthor>
        <RefAuthor>Ufer S</RefAuthor>
        <RefAuthor>Sodian B</RefAuthor>
        <RefAuthor>Hussmann H</RefAuthor>
        <RefAuthor>Pekrun R</RefAuthor>
        <RefAuthor>Neuhaus B</RefAuthor>
        <RefAuthor>Dorner B</RefAuthor>
        <RefAuthor>Pankofer S</RefAuthor>
        <RefAuthor>Fischer M</RefAuthor>
        <RefAuthor>Strijobs JW</RefAuthor>
        <RefAuthor>Heene M</RefAuthor>
        <RefAuthor>Eberle J</RefAuthor>
        <RefTitle>Scientific reasoning and argumentation: Advancing an interdisciplinary research agenda in education</RefTitle>
        <RefYear>2014(5)</RefYear>
        <RefJournal>Front Learn Res</RefJournal>
        <RefPage>28-45</RefPage>
        <RefTotal>Fischer F, Kollar I, Ufer S, Sodian B, Hussmann H, Pekrun R, Neuhaus B, Dorner B, Pankofer S, Fischer M, Strijobs JW, Heene M, Eberle J. Scientific reasoning and argumentation: Advancing an interdisciplinary research agenda in education. Front Learn Res. 2014(5):28-45.</RefTotal>
      </Reference>
      <Reference refNo="16">
        <RefAuthor>Schmidmaier R</RefAuthor>
        <RefAuthor>Eiber S</RefAuthor>
        <RefAuthor>Ebersbach R</RefAuthor>
        <RefAuthor>Schiller M</RefAuthor>
        <RefAuthor>Hege I</RefAuthor>
        <RefAuthor>Holzer M</RefAuthor>
        <RefAuthor>Fischer MR</RefAuthor>
        <RefTitle>Learning the facts in medical school is not enough: Which factors predict successful application of procedural knowledge in a laboratory setting&#63;</RefTitle>
        <RefYear>2013</RefYear>
        <RefJournal>BMC Med Educ</RefJournal>
        <RefPage>28</RefPage>
        <RefTotal>Schmidmaier R, Eiber S, Ebersbach R, Schiller M, Hege I, Holzer M, Fischer MR. Learning the facts in medical school is not enough: Which factors predict successful application of procedural knowledge in a laboratory setting&#63; BMC Med Educ. 2013;13:28. DOI: 10.1186&#47;1472-6920-13-28</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1186&#47;1472-6920-13-28</RefLink>
      </Reference>
      <Reference refNo="17">
        <RefAuthor>Klahr D</RefAuthor>
        <RefAuthor>Dunbar K</RefAuthor>
        <RefTitle>Dual space search during scientific reasoning</RefTitle>
        <RefYear>1988</RefYear>
        <RefJournal>Cogn Sci</RefJournal>
        <RefPage>1-48</RefPage>
        <RefTotal>Klahr D, Dunbar K. Dual space search during scientific reasoning. Cogn Sci. 1988;12:1-48. DOI: 10.1207&#47;s15516709cog1201&#95;1</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1207&#47;s15516709cog1201&#95;1</RefLink>
      </Reference>
      <Reference refNo="18">
        <RefAuthor>Wegner DM</RefAuthor>
        <RefTitle>Transactive memory: A contemporary analysis of the group mind</RefTitle>
        <RefYear>1987</RefYear>
        <RefBookTitle>Theories of group behavior</RefBookTitle>
        <RefPage>185-208</RefPage>
        <RefTotal>Wegner DM. Transactive memory: A contemporary analysis of the group mind. In: Mullen B, Goethals GR, editors. Theories of group behavior. New York, NY: Springer; 1987. p.185-208. DOI: 10.1007&#47;978-1-4612-4634-3&#95;9</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1007&#47;978-1-4612-4634-3&#95;9</RefLink>
      </Reference>
      <Reference refNo="19">
        <RefAuthor>Fiore SM</RefAuthor>
        <RefAuthor>Rosen MA</RefAuthor>
        <RefAuthor>Smith-Jentsch KA</RefAuthor>
        <RefAuthor>Salas E</RefAuthor>
        <RefAuthor>Letsky M</RefAuthor>
        <RefAuthor>Warner N</RefAuthor>
        <RefTitle>Toward an understanding of macrocognition in teams: Predicting processes in complex collaborative contexts</RefTitle>
        <RefYear>2010</RefYear>
        <RefJournal>Human Fact</RefJournal>
        <RefPage>203-224</RefPage>
        <RefTotal>Fiore SM, Rosen MA, Smith-Jentsch KA, Salas E, Letsky M, Warner N. Toward an understanding of macrocognition in teams: Predicting processes in complex collaborative contexts. Human Fact. 2010;52:203-224. DOI: 10.1177&#47;0018720810369807</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1177&#47;0018720810369807</RefLink>
      </Reference>
      <Reference refNo="20">
        <RefAuthor>Engelmann T</RefAuthor>
        <RefAuthor>Hesse FW</RefAuthor>
        <RefTitle>Fostering sharing of unshared knowledge by having access to the collaborators&#39; meta-knowledge structures</RefTitle>
        <RefYear>2011</RefYear>
        <RefJournal>Comput Hum Behav</RefJournal>
        <RefPage>2078-2087</RefPage>
        <RefTotal>Engelmann T, Hesse FW. Fostering sharing of unshared knowledge by having access to the collaborators&#39; meta-knowledge structures. Comput Hum Behav. 2011;27:2078-2087. DOI: 10.1016&#47;j.chb.2011.06.002</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1016&#47;j.chb.2011.06.002</RefLink>
      </Reference>
      <Reference refNo="21">
        <RefAuthor>Larson JR</RefAuthor>
        <RefAuthor>Christensen C</RefAuthor>
        <RefAuthor>Franz TM</RefAuthor>
        <RefAuthor>Abbott AS</RefAuthor>
        <RefTitle>Diagnosing Groups: The Pooling, Management, and Impact of Shared and Unshared Case Information in Team-Based Medical Decision Making</RefTitle>
        <RefYear>1998</RefYear>
        <RefJournal>J Person Soc Psychol</RefJournal>
        <RefPage>93-108</RefPage>
        <RefTotal>Larson JR, Christensen C, Franz TM, Abbott AS. Diagnosing Groups: The Pooling, Management, and Impact of Shared and Unshared Case Information in Team-Based Medical Decision Making. J Person Soc Psychol. 1998;75(1):93-108. DOI: 10.1037&#47;0022-3514.75.1.93</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1037&#47;0022-3514.75.1.93</RefLink>
      </Reference>
      <Reference refNo="22">
        <RefAuthor>Seidel T</RefAuthor>
        <RefAuthor>Sturmer K</RefAuthor>
        <RefTitle>Modeling and Measuring the Structure of Professional Vision in Preservice Teachers</RefTitle>
        <RefYear>2014</RefYear>
        <RefJournal>Am Educ Res J</RefJournal>
        <RefPage>739-771</RefPage>
        <RefTotal>Seidel T, Sturmer K. Modeling and Measuring the Structure of Professional Vision in Preservice Teachers. Am Educ Res J. 2014;51(4):739-771. DOI: 10.3102&#47;0002831214531321</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.3102&#47;0002831214531321</RefLink>
      </Reference>
      <Reference refNo="23">
        <RefAuthor>Hays RT</RefAuthor>
        <RefAuthor>Jacobs JW</RefAuthor>
        <RefAuthor>Prince C</RefAuthor>
        <RefAuthor>Salas E</RefAuthor>
        <RefTitle>Flight simulator training effectiveness: A meta-analysis</RefTitle>
        <RefYear>1992</RefYear>
        <RefJournal>Mil Psychol</RefJournal>
        <RefPage>63-74</RefPage>
        <RefTotal>Hays RT, Jacobs JW, Prince C, Salas E. Flight simulator training effectiveness: A meta-analysis. Mil Psychol. 1992;4(2):63-74. DOI: 10.1207&#47;s15327876mp0402&#95;1</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1207&#47;s15327876mp0402&#95;1</RefLink>
      </Reference>
      <Reference refNo="24">
        <RefAuthor>Smith R</RefAuthor>
        <RefTitle>The long history of gaming in military training</RefTitle>
        <RefYear>2010</RefYear>
        <RefJournal>Simul Gaming</RefJournal>
        <RefPage>6-19</RefPage>
        <RefTotal>Smith R. The long history of gaming in military training. Simul Gaming. 2010;41(1):6-19. DOI: 10.1177&#47;1046878109334330</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1177&#47;1046878109334330</RefLink>
      </Reference>
      <Reference refNo="25">
        <RefAuthor>Kolodner JL</RefAuthor>
        <RefTitle>An introduction to case-based reasoning</RefTitle>
        <RefYear>1992</RefYear>
        <RefJournal>Art Intellig Rev</RefJournal>
        <RefPage>3-34</RefPage>
        <RefTotal>Kolodner JL. An introduction to case-based reasoning. Art Intellig Rev. 1992;6(1):3-34. DOI: 10.1007&#47;BF00155578</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1007&#47;BF00155578</RefLink>
      </Reference>
      <Reference refNo="26">
        <RefAuthor>Ziv A</RefAuthor>
        <RefAuthor>Wolpe PR</RefAuthor>
        <RefAuthor>Small SD</RefAuthor>
        <RefAuthor>Glick S</RefAuthor>
        <RefTitle>Simulation-based medical education: An ethical imperative</RefTitle>
        <RefYear>2003</RefYear>
        <RefJournal>Acad Med</RefJournal>
        <RefPage>783-788</RefPage>
        <RefTotal>Ziv A, Wolpe PR, Small SD, Glick S. Simulation-based medical education: An ethical imperative. Acad Med. 2003;78(8):783-788. DOI: 10.1097&#47;00001888-200308000-00006</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1097&#47;00001888-200308000-00006</RefLink>
      </Reference>
      <Reference refNo="27">
        <RefAuthor>Ericsson KA</RefAuthor>
        <RefTitle>Deliberate practice and the acquisition and maintenance of expert performance in medicine and related domains</RefTitle>
        <RefYear>2004</RefYear>
        <RefJournal>Acad Med</RefJournal>
        <RefPage>70-81</RefPage>
        <RefTotal>Ericsson KA. Deliberate practice and the acquisition and maintenance of expert performance in medicine and related domains. Acad Med. 2004;79(10):70-81. DOI: 10.1097&#47;00001888-200410001-00022</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1097&#47;00001888-200410001-00022</RefLink>
      </Reference>
      <Reference refNo="28">
        <RefAuthor>Grossman P</RefAuthor>
        <RefAuthor>Compton C</RefAuthor>
        <RefAuthor>Igra D</RefAuthor>
        <RefAuthor>Ronfeldt M</RefAuthor>
        <RefAuthor>Shahan E</RefAuthor>
        <RefAuthor>Williamson P</RefAuthor>
        <RefTitle>Teaching practice: A cross-professional perspective</RefTitle>
        <RefYear>2009</RefYear>
        <RefJournal>Teach Coll Record</RefJournal>
        <RefPage>2055-2100</RefPage>
        <RefTotal>Grossman P, Compton C, Igra D, Ronfeldt M, Shahan E, Williamson P. Teaching practice: A cross-professional perspective. Teach Coll Record. 2009;111(9):2055-2100.</RefTotal>
      </Reference>
      <Reference refNo="29">
        <RefAuthor>Cook DA</RefAuthor>
        <RefAuthor>Hamstra SJ</RefAuthor>
        <RefAuthor>Brydges R</RefAuthor>
        <RefAuthor>Zendejas B</RefAuthor>
        <RefAuthor>Szostek JH</RefAuthor>
        <RefAuthor>Wang AT</RefAuthor>
        <RefAuthor>Erwin PJ</RefAuthor>
        <RefAuthor>Hatala R</RefAuthor>
        <RefTitle>Comparative effectiveness of instructional design features in simulation-based education: Systematic review and meta-analysis</RefTitle>
        <RefYear>2013</RefYear>
        <RefJournal>Med Teach</RefJournal>
        <RefPage>e867-e898</RefPage>
        <RefTotal>Cook DA, Hamstra SJ, Brydges R, Zendejas B, Szostek JH, Wang AT, Erwin PJ, Hatala R. Comparative effectiveness of instructional design features in simulation-based education: Systematic review and meta-analysis. Med Teach. 2013;35(1):e867-e898. DOI: 10.3109&#47;0142159X.2012.714886</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.3109&#47;0142159X.2012.714886</RefLink>
      </Reference>
      <Reference refNo="30">
        <RefAuthor>Mamede S</RefAuthor>
        <RefAuthor>van Gog T</RefAuthor>
        <RefAuthor>Sampaio AM</RefAuthor>
        <RefAuthor>Delbone de Faria RM</RefAuthor>
        <RefAuthor>Maria JP</RefAuthor>
        <RefAuthor>Schmidt HG</RefAuthor>
        <RefTitle>How can students&#39; diagnostic competence benefit most from practice with clinical cases&#63; The effect of structured reflection on future diagnosis of the same and novel diseases</RefTitle>
        <RefYear>2014</RefYear>
        <RefJournal>Acad Med</RefJournal>
        <RefPage>121-127</RefPage>
        <RefTotal>Mamede S, van Gog T, Sampaio AM, Delbone de Faria RM, Maria JP, Schmidt HG. How can students&#39; diagnostic competence benefit most from practice with clinical cases&#63; The effect of structured reflection on future diagnosis of the same and novel diseases. Acad Med. 2014;89(1):121-127. DOI: 10.1097&#47;ACM.0000000000000076</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1097&#47;ACM.0000000000000076</RefLink>
      </Reference>
      <Reference refNo="31">
        <RefAuthor>Schuwirth LW</RefAuthor>
        <RefAuthor>Van der Vleuten CP</RefAuthor>
        <RefTitle>The use of clinical simulations in assessment</RefTitle>
        <RefYear>2003</RefYear>
        <RefJournal>Med Educ</RefJournal>
        <RefPage>65-71</RefPage>
        <RefTotal>Schuwirth LW, Van der Vleuten CP. The use of clinical simulations in assessment. Med Educ. 2003;37:65-71. DOI: 10.1046&#47;j.1365-2923.37.s1.8.x</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1046&#47;j.1365-2923.37.s1.8.x</RefLink>
      </Reference>
      <Reference refNo="32">
        <RefAuthor>Siebeck M</RefAuthor>
        <RefAuthor>Schwald B</RefAuthor>
        <RefAuthor>Frey C</RefAuthor>
        <RefAuthor>R&#246;ding S</RefAuthor>
        <RefAuthor>Stegmann K</RefAuthor>
        <RefAuthor>Fischer F</RefAuthor>
        <RefTitle>Teaching the rectal examination with simulations: Effects on knowledge acquisition and inhibition</RefTitle>
        <RefYear>2011</RefYear>
        <RefJournal>Med Educ</RefJournal>
        <RefPage>1025-1031</RefPage>
        <RefTotal>Siebeck M, Schwald B, Frey C, R&#246;ding S, Stegmann K, Fischer F. Teaching the rectal examination with simulations: Effects on knowledge acquisition and inhibition. Med Educ. 2011;45:1025-1031. DOI: 10.1111&#47;j.1365-2923.2011.04005.x</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1111&#47;j.1365-2923.2011.04005.x</RefLink>
      </Reference>
      <Reference refNo="33">
        <RefAuthor>C&#246;mert M</RefAuthor>
        <RefAuthor>Zill JM</RefAuthor>
        <RefAuthor>Christalle E</RefAuthor>
        <RefAuthor>Dirmaier J</RefAuthor>
        <RefAuthor>H&#228;rter M</RefAuthor>
        <RefAuthor>Scholl I</RefAuthor>
        <RefTitle>Assessing communication skills of medical students in objective structured clinical examinations (OSCE) - A systematic review of rating scales</RefTitle>
        <RefYear>2016</RefYear>
        <RefJournal>PloS one</RefJournal>
        <RefPage>e0152717</RefPage>
        <RefTotal>C&#246;mert M, Zill JM, Christalle E, Dirmaier J, H&#228;rter M, Scholl I. Assessing communication skills of medical students in objective structured clinical examinations (OSCE) - A systematic review of rating scales. PloS one. 2016;11(3):e0152717. DOI: 10.1371&#47;journal.pone.0152717</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1371&#47;journal.pone.0152717</RefLink>
      </Reference>
      <Reference refNo="34">
        <RefAuthor>Helfer RE</RefAuthor>
        <RefAuthor>Slater CH</RefAuthor>
        <RefAuthor>Goltz L</RefAuthor>
        <RefTitle>Measuring the process of solving clinical diagnostic problems</RefTitle>
        <RefYear>1971</RefYear>
        <RefJournal>Med Educ</RefJournal>
        <RefPage>48-52</RefPage>
        <RefTotal>Helfer RE, Slater CH, Goltz L. Measuring the process of solving clinical diagnostic problems. Med Educ. 1971;5(1):48-52. DOI: 10.1111&#47;j.1365-2923.1971.tb02150.x</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1111&#47;j.1365-2923.1971.tb02150.x</RefLink>
      </Reference>
      <Reference refNo="35">
        <RefAuthor>Shavelson RJ</RefAuthor>
        <RefTitle>Assessing business-planning competence using the Collegiate Learning Assessment as a prototype</RefTitle>
        <RefYear>2012</RefYear>
        <RefJournal>Emp Res Vocat Educ Train</RefJournal>
        <RefPage>77-90</RefPage>
        <RefTotal>Shavelson RJ. Assessing business-planning competence using the Collegiate Learning Assessment as a prototype. Emp Res Vocat Educ Train. 2012;4(1):77-90.</RefTotal>
      </Reference>
      <Reference refNo="36">
        <RefAuthor>Cook DA</RefAuthor>
        <RefAuthor>Brydges R</RefAuthor>
        <RefAuthor>Zendejas B</RefAuthor>
        <RefAuthor>Hamstra SJ</RefAuthor>
        <RefAuthor>Hatala R</RefAuthor>
        <RefTitle>Technology-enhanced simulation to assess health professionals: A systematic review of validity evidence, research methods, and reporting quality</RefTitle>
        <RefYear>2013</RefYear>
        <RefJournal>Acad Med</RefJournal>
        <RefPage>872-883</RefPage>
        <RefTotal>Cook DA, Brydges R, Zendejas B, Hamstra SJ, Hatala R. Technology-enhanced simulation to assess health professionals: A systematic review of validity evidence, research methods, and reporting quality. Acad Med. 2013;88(6):872-883. DOI: 10.1097&#47;ACM.0b013e31828ffdcf</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1097&#47;ACM.0b013e31828ffdcf</RefLink>
      </Reference>
      <Reference refNo="37">
        <RefAuthor>Cook DA</RefAuthor>
        <RefAuthor>Brydges R</RefAuthor>
        <RefAuthor>Ginsburg S</RefAuthor>
        <RefAuthor>Hatala R</RefAuthor>
        <RefTitle>A contemporary approach to validity arguments: A practical guide to Kane&#39;s framework</RefTitle>
        <RefYear>2015</RefYear>
        <RefJournal>Med Educ</RefJournal>
        <RefPage>560-575</RefPage>
        <RefTotal>Cook DA, Brydges R, Ginsburg S, Hatala R. A contemporary approach to validity arguments: A practical guide to Kane&#39;s framework. Med Educ. 2015;49(6):560-575. DOI: 10.1111&#47;medu.12678</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1111&#47;medu.12678</RefLink>
      </Reference>
      <Reference refNo="38">
        <RefAuthor>Kane MT</RefAuthor>
        <RefTitle>Validating the interpretations and uses of test scores</RefTitle>
        <RefYear>2013</RefYear>
        <RefJournal>J Educ Measure</RefJournal>
        <RefPage>1-73</RefPage>
        <RefTotal>Kane MT. Validating the interpretations and uses of test scores. J Educ Measure. 2013;50(1):1-73. DOI: 10.1111&#47;jedm.12000</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1111&#47;jedm.12000</RefLink>
      </Reference>
      <Reference refNo="39">
        <RefAuthor>Sweller J</RefAuthor>
        <RefTitle>Cognitive load theory, learning difficulty, and instructional design</RefTitle>
        <RefYear>1994</RefYear>
        <RefJournal>Learn Instruct</RefJournal>
        <RefPage>295-312</RefPage>
        <RefTotal>Sweller J. Cognitive load theory, learning difficulty, and instructional design. Learn Instruct. 1994;4(4):295-312. DOI: 10.1016&#47;0959-4752(94)90003-5</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1016&#47;0959-4752(94)90003-5</RefLink>
      </Reference>
      <Reference refNo="40">
        <RefAuthor>Laugwitz B</RefAuthor>
        <RefAuthor>Held T</RefAuthor>
        <RefAuthor>Schrepp M</RefAuthor>
        <RefTitle>Construction and Evaluation of a User Experience Questionnaire</RefTitle>
        <RefYear>2008</RefYear>
        <RefBookTitle>HCI and usability for education and work. Lecture Notes in Computer Science</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Laugwitz B, Held T, Schrepp M. Construction and Evaluation of a User Experience Questionnaire. In: Holzinger A, editor. HCI and usability for education and work. Lecture Notes in Computer Science. Berlin; Heidelberg: Springer; 2008. DOI: 10.1007&#47;978-3-540-89350-9&#95;6</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1007&#47;978-3-540-89350-9&#95;6</RefLink>
      </Reference>
      <Reference refNo="41">
        <RefAuthor>Schubert T</RefAuthor>
        <RefAuthor>Friedmann F</RefAuthor>
        <RefAuthor>Regenbrecht H</RefAuthor>
        <RefTitle>The experience of presence: Factor analytic insights</RefTitle>
        <RefYear>2001</RefYear>
        <RefJournal>Presence</RefJournal>
        <RefPage>266-281</RefPage>
        <RefTotal>Schubert T, Friedmann F, Regenbrecht H. The experience of presence: Factor analytic insights. Presence. 2001;10(3):266-281. DOI: 10.1162&#47;105474601300343603</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1162&#47;105474601300343603</RefLink>
      </Reference>
      <Reference refNo="42">
        <RefAuthor>Opfermann M</RefAuthor>
        <RefTitle></RefTitle>
        <RefYear>2008</RefYear>
        <RefBookTitle>There&#39;s more to it than instructional design: The role of individual learner characteristics for hypermedia learning</RefBookTitle>
        <RefPage></RefPage>
        <RefTotal>Opfermann M. There&#39;s more to it than instructional design: The role of individual learner characteristics for hypermedia learning. Berlin, GER: Logos-Verlag; 2008.</RefTotal>
      </Reference>
      <Reference refNo="43">
        <RefAuthor>Charlin B</RefAuthor>
        <RefAuthor>Boshuizen HPA</RefAuthor>
        <RefAuthor>Custers EJ</RefAuthor>
        <RefAuthor>Feltovich PJ</RefAuthor>
        <RefTitle>Scripts and clinical reasoning</RefTitle>
        <RefYear>2007</RefYear>
        <RefJournal>Med Educ</RefJournal>
        <RefPage>1178-1184</RefPage>
        <RefTotal>Charlin B, Boshuizen HPA, Custers EJ, Feltovich PJ. Scripts and clinical reasoning. Med Educ. 2007;41(12):1178-1184. DOI: 10.1111&#47;j.1365-2923.2007.02924.x</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1111&#47;j.1365-2923.2007.02924.x</RefLink>
      </Reference>
      <Reference refNo="44">
        <RefAuthor>Kozlov MD</RefAuthor>
        <RefAuthor>Gro&#223;e CS</RefAuthor>
        <RefTitle>Online collaborative learning in dyads: Effects of knowledge distribution and awareness</RefTitle>
        <RefYear>2016</RefYear>
        <RefJournal>Comput Hum Behav</RefJournal>
        <RefPage>389-401</RefPage>
        <RefTotal>Kozlov MD, Gro&#223;e CS. Online collaborative learning in dyads: Effects of knowledge distribution and awareness. Comput Hum Behav. 2016;59:389-401. DOI: 10.1016&#47;j.chb.2016.01.043</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1016&#47;j.chb.2016.01.043</RefLink>
      </Reference>
      <Reference refNo="45">
        <RefAuthor>Cook DA</RefAuthor>
        <RefTitle>Much ado about differences: Why expert-novice comparisons add little to the validity argument</RefTitle>
        <RefYear>2015</RefYear>
        <RefJournal>Adv Health Sci Educ Theory Pract</RefJournal>
        <RefPage>829-834</RefPage>
        <RefTotal>Cook DA. Much ado about differences: Why expert-novice comparisons add little to the validity argument. Adv Health Sci Educ Theory Pract. 2015;20(3):829-834. DOI: 10.1007&#47;s10459-014-9551-3</RefTotal>
        <RefLink>https:&#47;&#47;doi.org&#47;10.1007&#47;s10459-014-9551-3</RefLink>
      </Reference>
    </References>
    <Media>
      <Tables>
        <Table format="png">
          <MediaNo>1</MediaNo>
          <MediaID>1</MediaID>
          <Caption><Pgraph><Mark1>Table 1: Inferences, assumptions, and warrants for the development of the argument of validity</Mark1> </Pgraph></Caption>
        </Table>
        <Table format="png">
          <MediaNo>2</MediaNo>
          <MediaID>2</MediaID>
          <Caption><Pgraph><Mark1>Table 2: Internal consistencies for all instruments</Mark1></Pgraph></Caption>
        </Table>
        <Table format="png">
          <MediaNo>3</MediaNo>
          <MediaID>3</MediaID>
          <Caption><Pgraph><Mark1>Table 3: Means and standard deviations per variable and group. </Mark1></Pgraph></Caption>
        </Table>
        <NoOfTables>3</NoOfTables>
      </Tables>
      <Figures>
        <Figure format="png" height="354" width="1116">
          <MediaNo>1</MediaNo>
          <MediaID>1</MediaID>
          <Caption><Pgraph><Mark1>Figure 1: Model for collaborative diagnostic reasoning (CDR) adapted from Radkowitsch et al. &#91;12&#93;</Mark1></Pgraph></Caption>
        </Figure>
        <Figure format="png" height="383" width="749">
          <MediaNo>2</MediaNo>
          <MediaID>2</MediaID>
          <Caption><Pgraph><Mark1>Figure 2: Schematic representation of the simulation</Mark1></Pgraph></Caption>
        </Figure>
        <Figure format="png" height="578" width="747">
          <MediaNo>3</MediaNo>
          <MediaID>3</MediaID>
          <Caption><Pgraph><Mark1>Figure 3: Differences of prior knowledge groups with respect to a) diagnostic accuracy, b) diagnostic efficiency, c) information sharing skill, and d) intrinsic cognitive load. Error bars indicate 95&#37; Confidence Intervals.</Mark1></Pgraph></Caption>
        </Figure>
        <NoOfPictures>3</NoOfPictures>
      </Figures>
      <InlineFigures>
        <NoOfPictures>0</NoOfPictures>
      </InlineFigures>
      <Attachments>
        <NoOfAttachments>0</NoOfAttachments>
      </Attachments>
    </Media>
  </OrigData>
</GmsArticle>