From 80cb9564ce80401fc77eac631249af8780f0f1d6 Mon Sep 17 00:00:00 2001 From: "LAPTOP-K24GVT55\\Karin" <karin.schoene@tu-dresden.de> Date: Thu, 30 Dec 2021 15:58:24 +0100 Subject: [PATCH] update for EN: text and translation minor text corrections in DE and IT documents --- A_translations.php | 2 +- F_footer.php | 2 +- de/download.php | 4 +- de/research.php | 18 +-- de/teacher.php | 30 ++--- en/download.php | 115 ++++++----------- en/help-annis-glossary.php | 2 +- en/mcorpus.php | 2 +- en/research.php | 203 ++++++++++-------------------- en/teacher.php | 252 ++++++++++++++++++++----------------- it/start.php | 2 +- 11 files changed, 270 insertions(+), 362 deletions(-) diff --git a/A_translations.php b/A_translations.php index 4be71c2..15ab05d 100644 --- a/A_translations.php +++ b/A_translations.php @@ -1,6 +1,6 @@ <?php - $o_menu = array( 'help_search' => array('en' => 'Search help', 'de' => 'Hilfe zur Suche', 'cz' => 'Hilfe xxx', 'it' => 'Hilfe xxx'), 'home' => array('en' => 'Home | Search', 'de' => 'Start | Suche', 'cz' => 'Úvod | VyhledávánÃ', 'it' => 'Home | Ricerca'), 'about' => array('en' => 'About MERLIN', 'de' => 'über MERLIN', 'cz' => 'O projektu MERLIN', 'it' => 'Su MERLIN'), 'help' => array('en' => 'Help', 'de' => 'Hilfe', 'cz' => 'Nápoveda', 'it' => 'Aiuto'), 'm_learning' => array('en' => 'Using MERLIN ', 'de' => 'MERLIN in der Praxis', 'cz' => 'MERLIN: v praxi', 'it' => 'Usare MERLIN'), 'm_research' => array('en' => 'MERLIN for research', 'de' => 'Forschen mit MERLIN', 'cz' => 'MERLIN: výzkum', 'it' => 'MERLIN per la ricerca'), 'm_corpus' => array('en' => 'MERLIN corpus', 'de' => 'MERLIN: Korpus', 'cz' => 'MERLIN: korpus', 'it' => 'Corpus MERLIN'), 'm_annotations' => array('en' => 'MERLIN annotations', 'de' => 'MERLIN: Annotationen', 'cz' => 'MERLIN: anotace', 'it' => 'Annotazioni MERLIN'), 'documentation' => array('en' => 'Documentation', 'de' => 'Dokumentation', 'cz' => 'Dokumentace', 'it' => 'Documentazione'), 'downloads' => array('en' => 'Download whole corpus', 'de' => 'Download: MERLIN-Korpus', 'cz' => 'Ke staženÃ', 'it' => 'Scarica il corpus'), 'simple' => array('en' => 'Simple search', 'de' => 'einfache Suche', 'cz' => 'Jednoduché vyhledávánÃ', 'it' => 'Ricerca semplice'), 'advanced' => array('en' => 'Advanced search', 'de' => 'erweiterte Suche', 'cz' => 'Pokrocilé vyhledávánÃ', 'it' => 'Ricerca avanzata'), 'document' => array('en' => 'Define a subcorpus', 'de' => 'Subkorpus festlegen', 'cz' => 'Vytvorit subkorpus', 'it' => 'Crea un sottocorpus'), 'learner' => array('en' => 'Statistics', 'de' => 'Statistik', 'cz' => 'Statistika', 'it' => 'Statistiche'), 'english' => array('en' => 'English', 'de' => 'Englisch', 'cz' => 'anglicky', 'it' => 'inglese'), 'german' => array('en' => 'German', 'de' => 'Deutsch', 'cz' => 'nemecky', 'it' => 'tedesco'), 'czech' => array('en' => 'Czech', 'de' => 'Tschechisch', 'cz' => 'cesky', 'it' => 'ceco'), 'italian' => array('en' => 'Italian', 'de' => 'Italienisch', 'cz' => 'italsky', 'it' => 'italiano'), 'team' => array('en' => 'Team', 'de' => 'Projektteam', 'cz' => 'Náš tým', 'it' => 'Team'), 'contact' => array('en' => 'Contact', 'de' => 'Kontakt', 'cz' => 'Kontakt', 'it' => 'Contatti'), 'disclaimer' => array('en' => 'Disclaimer', 'de' => 'Impressum', 'cz' => 'Impressum', 'it' => 'Disclaimer')); + $o_menu = array( 'help_search' => array('en' => 'Search help', 'de' => 'Hilfe zur Suche', 'cz' => 'Search help', 'it' => 'Aiuto'), 'home' => array('en' => 'Home | Search', 'de' => 'Start | Suche', 'cz' => 'Úvod | VyhledávánÃ', 'it' => 'Home | Ricerca'), 'about' => array('en' => 'About MERLIN', 'de' => 'über MERLIN', 'cz' => 'O projektu MERLIN', 'it' => 'Su MERLIN'), 'help' => array('en' => 'Help', 'de' => 'Hilfe', 'cz' => 'Nápoveda', 'it' => 'Aiuto'), 'm_learning' => array('en' => 'Using MERLIN ', 'de' => 'MERLIN in der Praxis', 'cz' => 'MERLIN: v praxi', 'it' => 'Usare MERLIN'), 'm_research' => array('en' => 'MERLIN for research', 'de' => 'Forschen mit MERLIN', 'cz' => 'MERLIN: výzkum', 'it' => 'MERLIN per la ricerca'), 'm_corpus' => array('en' => 'MERLIN corpus', 'de' => 'MERLIN: Korpus', 'cz' => 'MERLIN: korpus', 'it' => 'Corpus MERLIN'), 'm_annotations' => array('en' => 'MERLIN annotations', 'de' => 'MERLIN: Annotationen', 'cz' => 'MERLIN: anotace', 'it' => 'Annotazioni MERLIN'), 'documentation' => array('en' => 'Documentation', 'de' => 'Dokumentation', 'cz' => 'Dokumentace', 'it' => 'Documentazione'), 'downloads' => array('en' => 'Download whole corpus', 'de' => 'Download: MERLIN-Korpus', 'cz' => 'Ke staženÃ', 'it' => 'Scarica il corpus'), 'simple' => array('en' => 'Simple search', 'de' => 'einfache Suche', 'cz' => 'Jednoduché vyhledávánÃ', 'it' => 'Ricerca semplice'), 'advanced' => array('en' => 'Advanced search', 'de' => 'erweiterte Suche', 'cz' => 'Pokrocilé vyhledávánÃ', 'it' => 'Ricerca avanzata'), 'document' => array('en' => 'Define a subcorpus', 'de' => 'Subkorpus festlegen', 'cz' => 'Vytvorit subkorpus', 'it' => 'Crea un sottocorpus'), 'learner' => array('en' => 'Statistics', 'de' => 'Statistik', 'cz' => 'Statistika', 'it' => 'Statistiche'), 'english' => array('en' => 'English', 'de' => 'Englisch', 'cz' => 'anglicky', 'it' => 'inglese'), 'german' => array('en' => 'German', 'de' => 'Deutsch', 'cz' => 'nemecky', 'it' => 'tedesco'), 'czech' => array('en' => 'Czech', 'de' => 'Tschechisch', 'cz' => 'cesky', 'it' => 'ceco'), 'italian' => array('en' => 'Italian', 'de' => 'Italienisch', 'cz' => 'italsky', 'it' => 'italiano'), 'team' => array('en' => 'Team', 'de' => 'Projektteam', 'cz' => 'Náš tým', 'it' => 'Team'), 'contact' => array('en' => 'Contact', 'de' => 'Kontakt', 'cz' => 'Kontakt', 'it' => 'Contatti'), 'disclaimer' => array('en' => 'Disclaimer', 'de' => 'Impressum', 'cz' => 'Impressum', 'it' => 'Disclaimer')); $outdated = array('search_expl' => array( 'en' => 'e.g. to explore the use of words that your students often struggle with', 'de' => 'z.B. um den Gebrauch von Wörtern zu erkunden, die Lernern häufig Probleme bereiten'),'compose' => array( 'en' => 'Compose search', 'de' => 'Suche erstellen'),'click_text' => array( 'en' => 'Click to see whole text', 'de' => 'Klicken Sie auf die Textstelle, um den gesamten Text zu sehen'),'text_ident' => array( 'en' => 'Text identification', 'de' => 'Text'),'search_expl_ds' => array( 'en' => 'e.g. to create your own subcorpus', 'de' => 'z.B. um ein eigenes Subkorpus zusammenzustellen'),'text_feat' => array( 'en' => 'Text features', 'de' => 'Informationen über den Text')); diff --git a/F_footer.php b/F_footer.php index f103bed..1161693 100644 --- a/F_footer.php +++ b/F_footer.php @@ -1,7 +1,7 @@ <!--FOOTER--> <div id="footerpart"> <div id="footer-link-box"> - last modified: 23 April 2018 | + last modified: 30 December 2021 | contact: info@merlin-platform.eu | <a href="#" onclick="document.forms['team'].submit();"><?php echo $trans['team'][$_SESSION['lang']]; ?></a> | diff --git a/de/download.php b/de/download.php index 915861e..e508b4c 100644 --- a/de/download.php +++ b/de/download.php @@ -45,8 +45,8 @@ require('F_mainsidebar.php'); <p> </p> <h2><a name="archiv"></a>Archiv</h2> <p><img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/MERLIN_user-manual-GER.pdf" target="_blank" class="reference">MERLIN Benutzerhandbuch</a> | Hintergrundinformationen zu Zielen und Methoden des Projekts, Dokumentation der ursprünglichen Suchfunktionalitäten der Website</p> -<p><img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/WP4_UserModelling_Part1.pdf" target="_blank" class="reference"><strong>Bericht zur Nutzerbefragung</strong> - Teil 1</a> | Bedarf an der Veranschaulichung der Referenzrahmenniveaus, Ermittlung relevanter L2-Merkmale </p> -<p><img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/WP4_UserModelling_Part2Technical_report.pdf" target="_blank" class="reference"><strong>Bericht zur Nutzerbefragung</strong> - Teil 2</a> | technische Voraussetzungen bei den Nutzern, gewünschter Such- und Interface-Funktionalitäten, Umgang mit Daten</p> +<p><img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/WP4_UserModelling_Part1.pdf" target="_blank" class="reference">Bericht zur Nutzerbefragung - Teil 1</a> | Bedarf an der Veranschaulichung der Referenzrahmenniveaus, Ermittlung relevanter L2-Merkmale </p> +<p><img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/WP4_UserModelling_Part2Technical_report.pdf" target="_blank" class="reference">Bericht zur Nutzerbefragung - Teil 2</a> | technische Voraussetzungen bei den Nutzern, gewünschter Such- und Interface-Funktionalitäten, Umgang mit Daten</p> <p> </p> <!--INSERT END--> </div> diff --git a/de/research.php b/de/research.php index 7534913..54fe82c 100644 --- a/de/research.php +++ b/de/research.php @@ -27,7 +27,7 @@ require('F_mainsidebar.php'); <div id="anchor21"></div> <h3><a href="#anchor21" onClick="toggle('#content21','#img21')"><img id="img21" src="img/toggle-expand.png"></a> 2.1 Transkriptionen</h3> <div id="content21" class="content"> -<p>Die im Original handgeschriebenen Lernertexte (L2-Texte) wurden mit Hilfe eines xml-basierten Editors (xml mind ©) in den Testinstitutionen telc und ÚJOP transkribiert. Dazu wurden <a href="C_download.php#corpus" target="_blank">Transkriptionsrichtlinien</a> entwickelt. Die Reliabilität der Transkriptionen wurde kontrolliert, indem zunächst 5% der Transkripte pro Niveaustufe von Projektverantwortlichen überprüft wurden. Wegen der hohen Fehlerzahl in dieser Stichprobe wurden letztendlich nahezu alle Texte einer strengen Nachprüfung unterzogen.</p> +<p>Die im Original handgeschriebenen Lernertexte (L2-Texte) wurden mit Hilfe eines xml-basierten Editors (xml mind ©) in den Testinstitutionen telc und ÚJOP transkribiert. Dazu wurden Transkriptionsrichtlinien entwickelt. Die Reliabilität der Transkriptionen wurde kontrolliert, indem zunächst 5% der Transkripte pro Niveaustufe von Projektverantwortlichen überprüft wurden. Wegen der hohen Fehlerzahl in dieser Stichprobe wurden letztendlich nahezu alle Texte einer strengen Nachprüfung unterzogen.</p> <p>Die Transkriptionsrichtlinien (Inline-Annotation) enthalten Tags für die Annotation von Textcharakteristika wie etwa unlesbare oder mehrdeutige Textabschnitte, fremdsprachliche Wörter, Emoticons, Bilder, Abschnitte, Wörter, die aus der Aufgabenstellung kopiert wurden, oder Grußformeln. Die Anonymisierung (Namen, Orte u.ä.) fand ebenfalls richtlinienbasiert während der Transkriptionsphase statt. </p> <div> @@ -43,7 +43,7 @@ require('F_mainsidebar.php'); </div> <div id="anchor231"></div> <h3><a name="annotations"></a>2.3 Annotationen</h3> -<h4><a href="#anchor231" onClick="toggle('#content231','#img231')"><img id="img231" src="img/toggle-expand.png"></a> 2.3.1 Manuelle Annotationen</h4> +<h4><a href="#anchor231" onClick="toggle('#content231','#img231')"><img id="img231" src="img/toggle-expand.png"></a> 2.3.1 Manuelle Annotationen im Gesamtkorpus</h4> <div id="content231" class="content"> <p><img src="img/annotations_GRAPHIC-layer_de1.png" width="530" height="195" alt="EA1"></p> <h5>Minimale Zielhypothesen / Zielhypothesen 1 (TH1)</h5> @@ -76,12 +76,12 @@ require('F_mainsidebar.php'); <p>MERLIN-Tags, die für Fehler stehen (was nicht bei allen der Fall ist), beinhalten i. d. R. die Dimension der 'Zielsprachenmodifikation' (engl. 'target language modification', vgl. DÃaz-Negrillo/Fernández-DomÃnguez 2006). Diese gibt Aufschluss über den Fehler-Typ (ein Element kann fälschlich weggelassen, geändert, hinzugefügt, umgestellt, mit einem anderen Element zusammengeführt oder von ihm getrennt worden sein). Genaue Informationen dazu ergeben sich ebenfalls aus dem <a href="C_download.php#annotations" target="_blank">Annotationsschema</a>. </p> </div> <div id="anchor232"></div> - <h4><a name="corecorpus"></a><a href="#anchor232" onClick="toggle('#content232','#img232')"><img id="img232" src="img/toggle-expand.png"></a> 2.3.2 Das MERLIN-Kernkorpus </h4> + <h4><a name="corecorpus"></a><a href="#anchor232" onClick="toggle('#content232','#img232')"><img id="img232" src="img/toggle-expand.png"></a> 2.3.2 Manuelle Annotationen im Kernkorpus </h4> <div id="content232" class="content"> <h5>Zusammensetzung des MERLIN-Kernkorpus</h5> <p>Für ein kleineres Pilotkorpus (das <strong>MERLIN</strong>-<strong>Kernkorpus</strong>) wurden neben der Grammatik und der Orthographie weitere sprachliche Bereiche berücksichtigt. Das Kernkorpus besteht aus Texten, die einen <a href="#reratings">fairen Durchschnitt </a> von A2 oder B2 erhalten haben (für Italienisch: A2 und B1/B1+). So können zwei Gruppen mit deutlich verschiedenem Kompetenzniveau einander gegenübergestellt werden. Es ist wichtig hervorzuheben, dass die <em>Bewertung</em> der Lernertexte nicht notwendigerweise dem Niveaus des <em>Tests</em> entspricht. </p> <p>Viele Lernende erreichten ein höheres als das Zielniveau des bewältigten Sprachtests, während andere niedriger eingestuft wurden, als sie erwartet haben mögen. Ein Extremfall ist das Italienische: Hier wurden nur zwei Texte mit B2 bewertet, während viel mehr Sprachlernende Tests auf B2-Niveau absolviert haben. Das MERLIN-Kernkorpus für das Italienische beinhaltet neben A2-Texten die 100 Texte, die auf der Rasch-Logitskala am höchsten lagen (siehe "<a href="C_download.php#corpus" target="_blank">technical report</a>"). </p> - <h5>Annotationen im MERLIN-Kernkorpus</h5> + <p><img src="img/annotations_GRAPHIC-layer_de2.png" width="531" height="202"></p> <h5>Das Kernkorpus: erweiterte Zielhypothesen/Zielhypothesen 2 (TH2) </h5> <p>Dies geht mit einer größeren Subjektivität einher und erschwert die Herstellung von Reliabilität. Die Zielhypothese 2 wurde daher von Zielhypothese 1 getrennt. Dieses Vorgehen wurde auch im Falko-Projekt gewählt, mit dem MERLIN eng kooperierte. Das Ziel der TH2 ist es, die Perspektive der <strong>Angemessenheit</strong> von L2-Texten zu erfassen (im Unterschied zu ihrer Korrektheit, die in TH1 untersucht wird). Sie zielt darauf ab, eine Version zu erstellen, die einer muttersprachlichen Äußerung möglichst nahekommt. Die TH2 ist somit eine Erweiterung der TH1. Auch hier galt die Maßgabe, die Texte möglichst wenig zu ändern. Die TH2 tangiert semantische und lexikalische Aspekte ebenso wie die Pragmatik und Soziolinguistik. Im Unterschied zur TH1 werden auch satzübergreifende, kontextbedingte Phänomene modifiziert.</p> @@ -119,9 +119,9 @@ involvieren zahlreiche notwendig subjektive Entscheidungen. Die EA2-Annotationen <div id="content234" class="content"> <p>Für die Aufbereitung der Lernertexte zur Verwendung auf der MERLIN-Plattform wurde eine Kombination aus manuellen und automatischen Annotationsverfahren verwendet. Da linguistische Annotationen manuell sehr zeitaufwändig und teuer sind, wurde auf schon bestehende automatische Annotationstools zurückgegriffen. So konnte eine große Bandbreite linguistischer Annotationen verfügbar gemacht werden. Man muss dabei bedenken, dass besonders die automatische Annotation von Lernersprache eine große Herausforderung darstellt, da Lernersprache auf allen Ebenen der linguistischen Analyse, von der Orthographie bishin zur Semantik, deutlich von der Zielsprache abweicht.</p> <h5>Die folgenden Tools wurden für alle drei Zielsprachen in MERLIN verwendet:</h5> -<p>Für die Tokenisierung der Texte wurde der <a href="http://alias-i.com/lingpipe/docs/api/com/aliasi/tokenizer/IndoEuropeanTokenizerFactory.htm" target="_blank" class="reference">tokenizer for Indo-European +<p>Für die Tokenisierung der Texte wurde der <a href="http://www.alias-i.com/lingpipe/docs/api/com/aliasi/tokenizer/IndoEuropeanTokenizerFactory.html" target="_blank" class="reference">tokenizer for Indo-European languages</a> von LingPipe verwendet. Die Ergebnisse wurden manuell korrigiert. <br> -Sätze (sentences) wurden mithilfe des <a href="https://opennlp.apache.org/documentation/1.5.3/manual/opennlp.html#tools.sentdetect" target="_blank" class="reference">OpenNLP sentence +Sätze (sentences) wurden mithilfe des <a href="https://opennlp.apache.org/docs/1.9.4/manual/opennlp.html#tools.sentdetect" target="_blank" class="reference">OpenNLP sentence segmenter</a> segmentiert.<br> Wiederholungen (repetitions) wurden mithilfe der <a href="https://code.google.com/p/saphre" target="_blank" class="reference">Saphre library</a> auf der Basis der automatischen Part-of-speech- und Lemma-Annotationen (siehe unten) ermittelt.</p> @@ -196,8 +196,8 @@ library</a> auf der Basis der automatischen Part-of-speech- und Lemma-Annotation <div id="anchor3"></div> <h2><a href="#anchor3" onClick="toggle('#content3','#img3')"><img id="img3" src="img/toggle-expand.png"></a> 3. MERLIN in der Forschung </h2> <div id="content3" class="content"> -<p>Das Hauptziel des MERLIN-Projekts ist nicht forschungsorientiert: Die Plattform wurde für die Praxis entwickelt, um zur dort dringend benötigten empirischen Veranschaulichung bewerteter GER-Niveaustufen für Deutsch, Tschechisch und Italienisch beizutragen. Zunehmend entstehen Initiativen (wie etwa <a href="http://www.slate.eu.org/" target="_blank" class="reference">SLATE</a>), die auf den GER bezogene authentische Lernersprache zusammentragen. Einige stehen im Zusammenhang mit der <em>Reference Level Descriptions</em>-Initiative (siehe <a href="http://www.coe.int/t/dg4/linguistic/cadre1_en.asp" title="CoE website for RLD" target="_blank" class="reference">Website des Europarts für "Reference Level Descriptions"</a>), wie z.B. das umfangreiche <a href="http://www.englishprofile.org/" target="_blank" class="reference">English Profile Project</a>, oder ASK (für Norwegisch, Carlsen 2013) und das Profilo della lingua italiana (Spinelli/Parizzi 2010). Die RLDs sind auf einzelne Sprachen bezogene Konkretisierungen der Referenzrahmen-Niveaustufen.<br> -Aus solchen Korpora können typische Kennzeichnen bewerteter GER-Niveaustufen extrahiert werden (manchmal als "criterial features" bezeichnet, vgl. Hawkins/FilipovÃc 2012). Dieses Vorgehen vertieft das Verständnis der Bedeutung GER-bezogener Bewertungen und hilft dabei, den Gebrauch der Referenzrahmenskalen mit einer solideren, empirisch abgesicherten Basis zu unterlegen.</p> +<p>Das Hauptziel des MERLIN-Projekts ist nicht forschungsorientiert: Die Plattform wurde für die Praxis entwickelt, um zur dort dringend benötigten empirischen Veranschaulichung bewerteter GER-Niveaustufen für Deutsch, Tschechisch und Italienisch beizutragen. Zunehmend entstehen Initiativen, die auf den GER bezogene authentische Lernersprache zusammentragen. Einige stehen im Zusammenhang mit der <em>Reference Level Descriptions</em>-Initiative (siehe <a href="http://www.coe.int/t/dg4/linguistic/cadre1_en.asp" title="CoE website for RLD" target="_blank" class="reference">Website des Europarts für "Reference Level Descriptions"</a>), wie z.B. das umfangreiche <a href="http://www.englishprofile.org/" target="_blank" class="reference">English Profile Project</a>, oder <em>ASK</em> (für Norwegisch, Carlsen 2013) und das <em>Profilo della lingua italiana</em> (Spinelli/Parizzi 2010). Die RLDs sind auf einzelne Sprachen bezogene Konkretisierungen der Referenzrahmen-Niveaustufen.<br> +Aus solchen Korpora können typische Kennzeichen bewerteter GER-Niveaustufen extrahiert werden (manchmal als <em>criterial features</em> bezeichnet, vgl. Hawkins/FilipovÃc 2012). Dieses Vorgehen vertieft das Verständnis der Bedeutung GER-bezogener Bewertungen und hilft dabei, den Gebrauch der Referenzrahmenskalen mit einer solideren, empirisch abgesicherten Basis zu unterlegen.</p> <p>MERLIN trägt zur empirisch basierten Erforschung der GER-Stufen für Deutsch, Italienisch und Tschechisch bei und unterscheidet sich von den meisten existierenden Projekten dadurch, dass alle Daten - inklusive Volltexten, Testaufgaben und Annotationen - vollständig und kostenlos online verfügbar sind.<br> Neben diesem praktischen Ziel ist MERLIN jedoch auch für Forschungszwecke relevant, und zwar aus verschiedenen Perspektiven:</p> </div> @@ -205,7 +205,7 @@ Aus solchen Korpora können typische Kennzeichnen bewerteter GER-Niveaustufen e <div id="anchor31"></div> <h3><a name="scale-valid"></a><a href="#anchor31" onClick="toggle('#content31','#img31')"><img id="img31" src="img/toggle-expand.png"></a> 3.1 Die Validierung von GER-Skalen mit MERLIN</h3> <div id="content31" class="content"> -<p>Die beispiellose Initiative des Europarats zur Skalierung der GER-Deskriptoren (Europarat 2001; North 2000; Schneider/North 2000) hat zu deutlichen Verbesserungen bezüglich der Standardisierung und Transparenz beim Sprachenlernen, -lehren und -testen geführt. Wichtige Entscheidungen über das Leben von Lerner/-innen werden auf Grundlage der GER-Niveaus getroffen. In vielerlei Hinsicht scheint es, als hätten die Skalen ein Eigenleben angenommen - häufig überschätzt und missverstanden, werden sie auf eine Art und Weise eingesetzt, für die sie nicht geschaffen wurden (North 2000).</p> +<p>Die beispiellose Initiative des Europarats zur Skalierung der GER-Deskriptoren (Europarat 2001; North 2000; Schneider/North 2000) hat zu deutlichen Verbesserungen bezüglich der Standardisierung und Transparenz beim Sprachenlernen, -lehren und -testen geführt. Wichtige Entscheidungen über das Leben von Lerner/-innen werden auf Grundlage der GER-Niveaus getroffen. In vielerlei Hinsicht scheint es, als hätten die Skalen ein Eigenleben entwickelt - häufig überschätzt und missverstanden, werden sie auf eine Art und Weise eingesetzt, für die sie nicht geschaffen wurden (North 2000).</p> <p>Ein zentraler, bislang nur unzureichend erforschter Aspekt betrifft die empirische Validität der GER-Skalen (Fulcher 2004; Hulstijn 2007): Wenn Skalen zur Beschreibung oder Bewertung von Lernersprache eingesetzt werden, müssen sie reflektieren, was die Lernenden tatsächlich tun (Alderson 1991). Trotzdem gibt es bislang nahezu keine Studien, die untersuchen, inwiefern Lernersprache mit GER-Skalen überhaupt beschreibbar ist (Wisniewski 2014). MERLIN bietet die Möglichkeit, die Verbindung ausgewählter Deskriptoren des Referenzrahmens (wie z.B. "Umschreibungen" oder "inhaltliche Sprünge", die operationalisiert und annotiert wurden; siehe <a href="C_download.php#annotations" target="_blank">MERLIN-Annotationsschema</a>) zu Lernersprache zu analysieren, ohne Beurteilungen zu Rate ziehen zu müssen. </div> <div id="anchor32"></div> diff --git a/de/teacher.php b/de/teacher.php index b7758ce..8fb3ed4 100644 --- a/de/teacher.php +++ b/de/teacher.php @@ -22,7 +22,7 @@ require('F_mainsidebar.php'); diskutieren Stärken und Schwächen eines Beispiel-Textes aus dem MERLIN-Korpus und vergleichen ihn mit der Zielhypothese. </li> <li dir="ltr" aria-level="1"> bearbeiten selbst eine der <a href="C_mcorpus.php#tasks" target="_blank">MERLIN-Aufgaben</a> und vergleichen ihre eigene Produktion mit den MERLIN-Texten. </li> - <li>bewerten gegenseitig ihre Text-Produktionen mit Hilfe des <a href="C_download.php#corpus" target="_blank" class="a.reference">MERLIN-Bewertungsraster</a> (ab GER-Niveau B1).<br /> + <li>bewerten gegenseitig ihre Text-Produktionen mit Hilfe des <a href="C_download.php#corpus" target="_blank">MERLIN-Bewertungsraster</a> (ab GER-Niveau B1).<br /> </li> </ul> <p dir="ltr"><strong>Und so geht’s:</strong></p> @@ -31,7 +31,7 @@ require('F_mainsidebar.php'); Laden Sie sich die passende <strong>↘</strong> <a href="C_mcorpus.php#tasks" target="_blank">Aufgabenstellung</a> herunter. </li> <li dir="ltr" aria-level="1"> Laden Sie sich das <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip" target="_blank" class="a.reference">Merlin-Korpus herunter</a> und wählen Sie <em><strong> ↘ meta_ltext_TH</strong></em> (Lernertext mit Zielhypothesen). </li> - <li>Suchen Sie gezielt nach Texten mit dieser Aufgabenstellung [<em><strong>↘</strong></em> <em><strong>Filtern der Texte mit dem Datei-Manager</strong></em>].</li> + <li>Suchen Sie gezielt nach Texten mit dieser Aufgabenstellung [<em><strong>↘Startseite↘</strong></em> <strong><em>Filtern der Texte mit dem Datei-Manager</em></strong>]<em><strong>.</strong></em></li> </ol> </div> <div id="anchor2"></div> @@ -45,8 +45,8 @@ require('F_mainsidebar.php'); </ul> <p dir="ltr"><strong>Und so geht’s:</strong></p> <ol> - <li dir="ltr" aria-level="1"> Laden Sie sich das <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip" target="_blank" class="a.reference">Merlin-Korpus herunter </a>und wählen Sie <strong>↘</strong><strong><em> meta_ltext_TH</em></strong> (Lernertext mit Zielhypothesen).</li> - <li>Suchen Sie nun in Ihrem <a href="\start.php#anchor3" target="_blank">Datei-Manager</a> gezielt Texte auf einem bestimmten Niveau, z. B. B1 und mit einer bestimmten Muttersprache, z. B. Russisch. </li> + <li dir="ltr" aria-level="1"> Laden Sie sich das <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip" target="_blank">Merlin-Korpus herunter </a>und wählen Sie <strong>↘</strong><strong><em> meta_ltext_TH</em></strong> (Lernertext mit Zielhypothesen).</li> + <li>Suchen Sie nun in Ihrem Datei-Manager gezielt Texte auf einem bestimmten Niveau, z. B. B1 und mit einer bestimmten Muttersprache, z. B. Russisch [<em><strong>↘Startseite↘</strong></em> <strong><em>Filtern der Texte mit dem Datei-Manager</em></strong>]. </li> </ol> <p><img src="img/hint_bulb.png" alt="hint bulb" /><span class="StilSmall"> Sie können die MERLIN-Daten auch nutzen, um die verschiedenen Aspekte der kommunikativen L2-Kompetenz, z. B. Wortschatzspektrum und -beherrschung, grammatische Korrektheit oder Kohärenz/Kohäsion, auf verschiedenen GER-Niveaus zu erkunden. Denn sprachliche Kompetenz ist meist in Profilen ausgeprägt: so mag ein Lerner beispielsweise erfolgreicher im Bereich der grammatischen Korrektheit sein, während sein Wortschatz noch beschränkt ist.</span></p> </div> @@ -82,9 +82,9 @@ require('F_mainsidebar.php'); </ul> <p dir="ltr"><strong>Und so geht’s:</strong></p> <ol> - <li dir="ltr" aria-level="1">Prüfen Sie in der <a href="#annotations" onclick="document.forms['glossary'].submit();"><?php echo $trans['help_search'][$_SESSION['lang']];?>Liste aller annotierten Merkmale der Lernersprache</a>, welches Problem oder Merkmal Sie interessiert.</li> + <li dir="ltr" aria-level="1">Prüfen Sie in der <a href="#annotations" onclick="document.forms['glossary'].submit();"><?php echo $trans['help_search'][$_SESSION['lang']];?></a> in Abschnitt 2 <strong>↘</strong> <strong><em>die Liste aller annotierten Merkmale der Lernersprache</em></strong>, welches Problem oder Merkmal Sie interessiert.</li> <li dir="ltr" aria-level="1">Öffnen Sie die <a href="https://merlin-platform.eu/annis/" target="_blank" class="a.reference">ANNIS-Suche</a>, um gezielt nach dem Merkmal zu suchen, z. B. <em>G_Refl_type</em>. Gehen Sie dabei so vor, wie unter <a href="#" onclick="document.forms['glossary'].submit();" class="a.reference"><?php echo $trans['help_search'][$_SESSION['lang']];?></a> beschrieben.</li> - <li dir="ltr" aria-level="1">Schauen Sie sich nun das Suchergebnis an. Unter <strong>↘</strong> <em><strong> full text</strong></em> können Sie sich den Fehler im Kontext ansehen und Sätze aus dem Lernertext kopieren.</li> + <li dir="ltr" aria-level="1">Schauen Sie sich nun das Suchergebnis an. Unter <strong>↘</strong> <em><strong> full text</strong></em> können Sie sich das Merkmal/den Fehler im Kontext ansehen und Sätze aus dem Lernertext kopieren.</li> </ol> </div> <h2 dir="ltr">2 MERLIN für Curriculums- und Lehrwerksplanung</h2> @@ -118,7 +118,7 @@ require('F_mainsidebar.php'); <p dir="ltr">Die meisten Sprachtests in Europa sind - mehr oder weniger solide - auf den GER bezogen. Zwar bietet der Europarat viel <a href="http://www.coe.int/t/dg4/linguistic/cadre1_en.asp" target="_blank" class="reference">nützliches Material</a>, um dies zu erleichtern, jedoch gibt es v. a. für andere Sprachen als das Englische bislang kaum empirische Daten, d. h. nur sehr wenige auf die GER-Niveaustufen bezogene Sprachproben, die den Prozess des GER-Bezugs in der Testentwicklung unterstützen könnten (einen Ãœberblick über die verfügbaren Referenzniveau-Beschreibungen findet sich auf den <a href="https://www.coe.int/en/web/common-european-framework-reference-languages/reference-level-descriptions-rlds-developed-so-far" target="_blank" class="reference">Webseiten des Europarates</a>; für Englisch siehe <a href="http://www.englishprofile.org/" target="_blank" class="reference">www.englishprofile.org</a>). </p> <p dir="ltr">Folgende Anwendungsmöglichkeiten ergeben sich unseres Erachtens für die MERLIN-Daten:</p> <ul> - <li dir="ltr" aria-level="1"> Verbesserung der Transparenz und Qualität der Konstruktion von GER-bezogenen Italienisch-, Deutsch- und Tschechischtests; </li> + <li dir="ltr" aria-level="1"> Verbesserung der Transparenz und Qualität der Konstruktion von GER-bezogenen Italienisch-, Deutsch- und Tschechischtests</li> <li dir="ltr" aria-level="1"> Benchmarking </li> <li dir="ltr" aria-level="1"> empirisch basierte Entwicklung von Testmaterialien</li> </ul> @@ -131,22 +131,24 @@ require('F_mainsidebar.php'); <p dir="ltr"><strong>Ziel</strong>: ein gemeinsames Verständnis der GER-Niveaus an der eigenen Einrichtung schaffen </p> <p dir="ltr"><strong>Szenarien</strong>: Ãœben Sie im Kollegium anhand der MERLIN-Aufgaben und -Texte die Bewertung schriftlicher L2-Produktionen und vergleichen Sie Ihre Bewertungen mit denen des MERLIN-Teams.</p> <ul> - <li dir="ltr" aria-level="1"> Filtern Sie dazu eine beliebige Stichprobe von Texten zu einer bestimmten Aufgabe, z. B. "Neujahrsbrief an einen Freund schreiben" und lassen Sie Ihre Kolleg/-innen die Texte mit Hilfe des <a href="https://merlin-platform.eu/C_download.php#corpus" target="_blank" class="a.reference">MERLIN-Bewertungsrasters </a>nachbewerten. </li> + <li dir="ltr" aria-level="1"> Filtern Sie dazu eine beliebige Stichprobe von Texten zu einer bestimmten Aufgabe, z. B. "Neujahrsbrief an einen Freund schreiben" und lassen Sie Ihre Kolleg/-innen die Texte mit Hilfe des <a href="C_download.php#corpus" target="_blank" class="a.reference">MERLIN-Bewertungsrasters </a>nachbewerten. </li> <li dir="ltr" aria-level="1"> Diskutieren Sie die Ergebnisse untereinander und vergleichen Sie sie am Ende mit den Bewertungen des MERLIN-Teams.</li> </ul> <p dir="ltr"><strong>Und so geht’s</strong>:</p> <p dir="ltr">So finden Sie Lernertexte zu einer bestimmten Aufgabenstellung.</p> <ol> - <li dir="ltr" aria-level="1"> Wählen Sie in der Ãœbersicht die passende Aufgabenstellung und laden Sie sie herunter.</li> - <li dir="ltr" aria-level="1"> Laden Sie sich das <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip" target="_blank" class="a.reference">Merlin-Korpus herunter</a> und wählen Sie <strong>↘</strong> <em><strong> </strong></em> <em><strong>meta_ltext_TH</strong></em> (Lernertext mit Zielhypothesen).</li> + <li dir="ltr" aria-level="1"> Wählen Sie in der <a href="C_mcorpus.php#tasks" target="_blank">Ãœbersicht</a> die passende Aufgabenstellung und laden Sie sie herunter.</li> + <li dir="ltr" aria-level="1"> Laden Sie sich das <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip" target="_blank" class="a.reference">Merlin-Korpus herunter</a> und wählen Sie <strong>↘</strong> <em><strong>meta_ltext_TH</strong></em> (Lernertext mit Zielhypothesen).</li> <li>Suchen Sie im Datei-Manager gezielt nach Texten mit dieser Aufgabenstellung [<em><strong>↘Startseite↘</strong></em> <strong><em>Filtern der Texte mit dem Datei-Manager</em></strong>].</li> </ol> </div> <h2>Links </h2> -<p>Europarat (2011). <a href="http://www.coe.int/t/dg4/linguistic/Cadre1_en.asp" target="_blank" class="reference">Gemeinsamer europäischer Referenzrahmen für Sprachen</a>. Europarat 2001</p> -<p><a href="https://rm.coe.int/cefr-companion-volume-with-new-descriptors-2018/1680787989 "target="_blank" class="reference">Begleitband (Companion volume)</a> zum Gemeinsamen europäischen Referenzrahmen für Sprachen. Europarat 2018</p> -<p><a href="http://www.coe.int/t/dg4/linguistic/Cadre1_en.asp" target="_blank" class="reference">Materialien des Europarats, unterstützend zum GeRS-Einsatz</a></p> -<p><a href="http://www.englishprofile.org/" target="_blank" class="reference">The English profile</a> +<p>Council of Europe (2011). <a href="https://www.coe.int/en/web/common-european-framework-reference-languages" target="_blank" class="reference">Common European Framework of Reference for Languages: Learning, Teaching, Assessment</a>. Council of Europe 2001<br /> + Council of Europe (2018). <a href="https://rm.coe.int/cefr-companion-volume-with-new-descriptors-2018/1680787989 "target="_blank" class="reference"> Common European Framework of Reference for Languages: Learning, Teaching, Assessment. Companion volume with new descriptors</a>. Council of Europe 2018<br /> + <a href="https://www.coe.int/en/web/platform-plurilingual-intercultural-language-education/home" target="_blank" class="reference"> +Platform of resources and references for plurilingual and intercultural education +</a><br /> + <a href="http://www.englishprofile.org/" target="_blank" class="reference">The English profile</a> <!--INSERT END--> </p> </div> diff --git a/en/download.php b/en/download.php index ecb2ccc..7d767e5 100644 --- a/en/download.php +++ b/en/download.php @@ -8,87 +8,44 @@ require('F_mainsidebar.php'); <div id="content-menu3"> <!--INSERT--> <h1>MERLIN project documentation</h1> -<p><strong>Download MERLIN-related documents.</strong></p> -<p> </p> -<h2><a name="using"></a>Using MERLIN</h2> -<p><strong>MERLIN User manual</strong>: <a href="docs/MERLIN_user-manual-CZ.pdf" target="_blank">Czech</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"> | <a href="docs/MERLIN_user-manual-GER.pdf" target="_blank">German</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"> | <a href="docs/MERLIN_user-manual-ITA.pdf" target="_blank">Italian</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"> | <a href="docs/MERLIN_user-manual-EN.pdf">English</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"></p> -<p>Please download <strong>usage scenarios </strong>for the MERLIN platform in the section <a href="C_teacher.php" target="_blank" class="reference">Using MERLIN</a>.</p> -<p> </p> -<h2><a name="corpus"></a>Corpus: Tests and data preparation</h2> -<p><a href="docs/Transcription-guidelines_DE.pdf" target="_blank" class="reference">Transcription guidelines</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"></p> -<p><strong>Complete test tasks </strong>including a task description are available for download in the section <a href="C_mcorpus.php#anchor3" target="_blank" class="reference">MERLIN corpus</a>.</p> -<p><strong>MERLIN rating grid</strong>: <a href="docs/MERLIN_Rating-Grid_CZE.pdf" target="_blank">Czech</a> <img src="img/document-pdf.png" width="16" height="16"> | <a href="docs/MERLIN_Rating-Grid_DE.pdf" target="_blank">German</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"> | <a href="docs/MERLIN_Rating-Grid_ITA.pdf" target="_blank">Italian</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"></p> -<p><a href="docs/MERLIN_Technical-report.pdf" target="_blank" class="reference">Technical report</a> <img src="img/document-pdf.png" alt="" width="16" height="16">: Report on the reliability and scale functionality of the MERLIN written speech sample ratings, by O. Bärenfänger</p> -<p> </p> -<h2><a name="annotations"></a>Annotations: Annotation scheme and annotation process</h2> -<p><strong><a href="docs/Annotation guidelines.pdf" class="reference">Annotation manual</a></strong> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"></p> -<p><a href="docs/MERLIN-annotation-scheme.pdf" target="_blank" class="reference">MERLIN annotation scheme</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"></p> -<p><a href="docs/MERLIN_FAQ_final.pdf" class="reference">Documentation of additional annotation issues</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"></p> -<p> </p> -<h2><a name="pub"></a>MERLIN conference presentations </h2> -<p>Andrea Abel & Katrin Wisniewski. MER<em>LIN - die mehrsprachige Plattform für die europäischen Referenzniveaus </em>at the 6th ÖGSD Conference in Salzburg, November 2015 (accepted).</p> -<p>Katrin Wisniewski. <em>Empirisch gestützte Arbeit mit dem GeRS: Zur Einschätzung schriftlicher Leistungen in Deutsch, Tschechisch und Italienisch als Fremdsprachen mit dem Lernerkorpus MERLIN</em>. 26. Kongress der deutschen Gesellschaft für Fremdsprachenforschung in Ludwigsburg, September/October 2015 (accepted).</p> -<p>Katrin Wisniewski. <em>Empirical correlates of CEFR vocabulary and coherence level descriptions in learner language: A non-circular corpus-based validation approach</em>. Third Learner Corpus Research Conference, Nijmegen, September 2015 (accepted).</p> -<p>Katrin Wisniewski, Andrea Abel & Verena Lyding. <em>The MERLIN platform: exploring CEFR-related learner texts. Software demo at the </em>Third Learner Corpus Research Conference, Nijmegen, September 2015 (accepted).</p> -<p>Katrin Wisniewski. <em>Empirical validity evidence for the Common European Framework of Reference scales. </em>12th Conference of the European Association of Language Testing and Assessment, Kopenhagen, May 2015 (accepted).</p> -<p>Å tindlová, B./ÄŒurdova V. (2015): <a href="docs/9.6-9.8_Research Paper_Stindlova_Curdova_2015.pdf" target="_blank">MERLIN: Multilingválnà platforma pro evropské referenÄnà úrovnÄ›</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">. In: <em>ÄŒasopis pro modernà filologii, dvojÄÃslo u pÅ™Ãležitosti 20. výroÄà ČNK</em>. <a href="http://cmf.ff.cuni.cz/?q=en(accepted)" target="_blank" class="reference">http://cmf.ff.cuni.cz/?q=en</a> (accepted). </p> -<p>Abel, Andrea: „Der Gemeinsame europäische Referenzrahmen für Sprachen und MERLIN: eine Ressource für Lehrende und Lernende“, - 11. bundesweites Seminar „Interkulturalität und Mehrsprachigkeit in der schulischen Praxis“, vom 20.-21.03.2015, in Graz (AT), <a href="docs/ikm-programm_11.pdf">Programme</a> <em><img src="img/document-pdf.png" alt="pdf" width="16" height="16"></em></p> -<p>Katrin Wisniewski. <em>MERLIN: Eine Plattform zur Veranschaulichung der Niveaustufen des Gemeinsamen europäischen Referenzrahmens für Sprachen</em>. Conference organized by the MSZ of the Technical University of Dresden: Online-Lernszenarien für Sprachlehre, Weiterbildung und Studium, October 2014. (<em><a href="docs/Conference-presentation-MSZ-Dresden.pptx" target="_blank">slides </a></em><em><a href="docs/CEFR_WebConference_Wisniewski_Abel_MERLIN.pdf" target="_blank"></a></em> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</p> - -<p>Katrin Wisniewski. <em>Der Beitrag von Lernerkorpora zur Konstruktion und Validierung von Bewertungsskalen</em>. GAL-Kongress, Marburg, September 16-19, 2014. (<em><a href="docs/Conference-presentation_GAL 2014_Wisniewski.pdf" target="_blank">slides </a></em><em><a href="docs/CEFR_WebConference_Wisniewski_Abel_MERLIN.pdf" target="_blank"></a></em> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</p> -<p>Barbora Å tindlová & Veronika ÄŒurdová. <em>MERLIN Multilingválnà platforma pro evropské referenÄnà úrovnÄ›. </em>Korpusová lingvistika, Prague, September 2014. (<em><a href="docs/Conference-presentation_Korpusova lingvistika_2014.pdf" target="_blank">slides </a></em><em><a href="docs/CEFR_WebConference_Wisniewski_Abel_MERLIN.pdf" target="_blank"></a></em> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</p> - -<p>Adriane Boyd, Jirka Hana, Lionel Nicolas, Detmar Meurers, Katrin Wisniewksi, Andrea Abel, Karin Schöne, Barbora Å tindlová & Chiara Vettori: The MERLIN Corpus. Learner language and the CEFR. Poster presentation at the <em>Language Resources and Evaluation Conference in Reiykjavik, </em>26. Mai-1.Juni 2014. European Language Resources Association (ELRA). (<em><a href="docs/Conference-poster_LREC_2014_Nicolas-et-al.pdf" target="_blank">poster</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</em></p> - -<p>Katrin Wisniewski. <em>MERLIN - a multifunctional trilingual learner corpus related to the CEFR</em>. 11th EALTA Conference in Warwick, UK, May 29 - June 1, 2014. (<em><a href="docs/Conference-presentation_EALTA 2014_Wisniewski.pdf" target="_blank">slides </a></em><em><a href="docs/CEFR_WebConference_Wisniewski_Abel_MERLIN.pdf" target="_blank"></a></em> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</p> -<p>Barbora Å tindlová, Veronika ÄŒurdová, Petra KlimeÅ¡ová, Eva Levorová: <em>ŽÃKOVSKà KORPUS MERLIN: JAZYKOVÉ ÚROVNÄš A TROJJAZYÄŒNà CHYBOVà ANOTACE</em>, June 2014. (<em><a href="docs/Conference-presentation_ILPS_June 2014_Korpus-MERLIN.pdf" target="_blank">slides </a></em><em><a href="docs/CEFR_WebConference_Wisniewski_Abel_MERLIN.pdf" target="_blank"></a></em> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</p> -<p> Barbora Å tindlová & Veronika ÄŒurdová. <em>MERLIN: Illustrating European Reference Levels in Three Languages</em>. Conference on Grammar and Corpora, Warsaw, June 2014. (<em><a href="docs/Conference-presentation_Corpora and grammar_Stindlova_Curdova.pdf" target="_blank">slides </a></em><em><a href="docs/CEFR_WebConference_Wisniewski_Abel_MERLIN.pdf" target="_blank"></a></em> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</p> - -<p>Katrin Wisniewski, Andrea Abel. <em>The CEFR level descriptions and empirical learner data: MERLIN, a multi-lingual corpus initiative</em>. <a href="http://livestre.am/4OBIw" target="_blank">CEFR Web Conference</a>, March 2014. (<em><a href="docs/CEFR_WebConference_Wisniewski_Abel_MERLIN.pdf" target="_blank">slides </a></em><em><a href="docs/CEFR_WebConference_Wisniewski_Abel_MERLIN.pdf" target="_blank"></a></em> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</p> -<p>Katrin Wisniewski. <em>Giving a Voice to the Learner. Using the Multilingual MERLIN Learner Corpus Related to the Common European Framework of Reference for Scale Validation</em>.<em> <a href="http://www.engl.polyu.edu.hk/events/apclc2014/index.html" target="_blank">Second Asia Pacific Corpus Linguistics Conference (APCLC 2014)</a></em>, Hong Kong, March 7-9, 2014.</p> -<p>Lionel Nicolas: <em>MERLIN: An Online Trilingual Learner Corpus Empirically Grounding the European Reference Levels in Authentic Learner Data. </em>ICT for Language Learning Conference in Florence, 6th edition, November 2013. <em>(<a href="docs/Conference-poster_ICTFLL_poster_Nicolas.pdf">poster</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">)</em></p> - -<p>Julia Hancke and Detmar Meurers. <em><a href="http://www.sfs.uni-tuebingen.de/~dm/papers/Hancke.Meurers-13.pdf" target="_blank">Exploring CEFR classification for German based on rich linguistic modeling</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">.</em> Learner Corpus Research 2013. Book of Abstracts, pp. 54-56. Bergen, Norway, September 27-29, 2013. (<a href="http://www.sfs.uni-tuebingen.de/~dm/papers/Hancke.Meurers-13-slides.pdf" target="_blank"><em>slides of the talk</em></a> <em><img src="img/document-pdf.png" alt="pdf" width="16" height="16"></em>)</p> -<p>Andrea Abel, Lionel Nicolas, Jirka Hana, Barbora Štindlová, Serhiy Bykh and Detmar Meurers. <em><a href="docs/PPT_Abel_MERLIN_LCR_2013_final.pdf" target="_blank">A Trilingual Learner Corpus illustrating European Reference Levels</a></em> <img src="img/document-pdf.png" alt="pdf" width="16" height="16">. Learner Corpus Research Conference 2013, Book of Abstracts, Bergen, Norway, September 27-29, 2013. </p> -<p>Andrea Abel. <em>Lernertexte zuverlässig bewerten: Die mehrsprachige Plattform für die Europäischen Referenzniveaus MERLIN</em>. <a href="http://www.idt-2013.it" target="_blank">IDT 2013</a>, Bolzano, Italy, July 29 - August 3, 2013. (<a href="docs/Abel_MERLIN_IDT_2013_v1.pdf" target="_blank"><em>slides</em></a> <img src="img/document-pdf.png" alt="pdf">)</p> -<p>Katrin Wisniewski. Poster: <em>Illustrating and Researching the Common European Framework Levels with a Multilingual Online Platform</em>. <a href="http://www.ltrc2013.or.kr/" target="_blank">The 35th Language Testing Research Colloquium</a>, Seoul, Korea, July 3-5, 2013.</p> -<p> </p> -<h2>MERLIN publications </h2> +<h2><a name="corpus"></a> Tests tasks and data preparation</h2> +<p><strong>Complete test tasks </strong>including a task description are available for download in the section<strong> </strong><a href="C_mcorpus.php#tasks" target="_blank">MERLIN corpus</a>.</p> +<p><strong>MERLIN rating grid</strong>: <img src="img/document-pdf.png" width="16" height="16"> <a href="docs/MERLIN_Rating-Grid_CZE.pdf" target="_blank">Czech</a> | <img src="img/document-pdf.png" width="16" height="16"> <a href="docs/MERLIN_Rating-Grid_DE.pdf" target="_blank">German</a> | <img src="img/document-pdf.png" width="16" height="16"> <a href="docs/MERLIN_Rating-Grid_ITA.pdf" target="_blank">Italian</a></p> +<p><img src="img/document-pdf.png" width="16" height="16"> <a href="docs/MERLIN_Technical-report.pdf" target="_blank">Technical report</a>: Report on the reliability and scale functionality of the MERLIN written speech sample ratings, by O. Bärenfänger</p> +<h2><a name="annotations"></a>Annotation scheme and annotation process</h2> +<p><img src="img/document-pdf.png" width="16" height="16"> <a href="docs/Annotation guidelines.pdf" target="_blank">Annotation manual (annotation structure and guidelines)</a></p> +<p><img src="img/document-pdf.png" width="16" height="16"> <a href="docs/MERLIN-annotation-scheme.pdf" target="_blank">MERLIN annotation scheme </a></p> +<h2><a name="pub"></a>Publications </h2> +<p dir="ltr">Wisniewski, Katrin (2020): SLA developmental stages in the CEFR-related learner corpus MERLIN: Inversion and verb-end structures in German A2 and B1 learner texts. International Journal of Learner Corpus Research, 6(1), 1-17.</p> +<p dir="ltr">Weber, Tassja (2018): Grammatik und Lernerkorpora: Eine korpusorientierte Untersuchung von Präpositionalphrasen im deutschen MERLIN-Korpus. In: Fuß, E. et al. (dir.). Grammar and Corpora 2016. Heidelberg: Heidelberg University Publishing, 415-424.</p> +<p dir="ltr">Rysová KateÅ™ina, Rysová Magdaléna, Novák Michal, MÃrovský JiÅ™Ã, HajiÄová Eva (2019): <img src="img/document-pdf.png" alt="pdf" width="16" height="16" /> <a href="https://ufal.mff.cuni.cz/pbml/113/art-rysova-et-al.pdf" target="_blank">EVALD – a Pioneer Application for Automated Essay Scoring in Czech</a>. The Prague Bulletin of Mathematical Linguistics 113, 9–30. </p> +<p dir="ltr">Novák, Michal; MÃrovský, JiÅ™Ã; Rysová, KateÅ™ina; Rysová, Magdaléna (2019): Exploiting Large Unlabeled Data in Automatic Evaluation of Coherence in Czech. Lecture Notes in Computer Science, Vol. 11697, Proceedings of the 22nd International Conference on Text, Speech and Dialogue – TSD 2019. Springer International Publishing, 197–210.</p> +<p dir="ltr">Rysová, Magdaléna; Rysová, KateÅ™ina; MÃrovský, JiÅ™Ã; Novák, Michal (2019): <a href="https://library.iated.org/view/RYSOVA2019COH" target="_blank">Coherence Errors in Learners’ Essays and a Possibility of Their Improvement through EVALD</a> (Automated Evaluator of Discourse). Proceedings of the 11th Annual International Conference on Education and New Learning Technologies (EDULEARN 2019). Palma, Spain: IATED Academy, 2019, pp. 6761–6768. </p> +<p dir="ltr">Boyd, Adriane (2018): <img src="img/document-pdf.png" alt="pdf" width="16" height="16" /> <a href="https://www.aclweb.org/anthology/W18-6111.pdf" target="_blank">Using Wikipedia edits in low resource grammatical error correction</a>. Proceedings of the 2018 EMNLP Workshop W-NUT: The4th Workshop on Noisy User-generated Text. Brussels, 79–84.</p> +<p dir="ltr">Novák, Michal; MÃrovský, JiÅ™Ã; Rysová, KateÅ™ina; Rysová, Magdaléna (2018): Topic–Focus Articulation: A Third Pillar of Automatic Evaluation of Text Coherence. Advances in Computational Intelligence (LNAI 11289): 17th Mexican International Conference on Artificial Intelligence, MICAI 2018, Proceedings, Part II, Switzerland: Springer, 96–108.</p> +<p dir="ltr">Rysová, Magdaléna; Rysová, KateÅ™ina; MÃrovský, JiÅ™Ã; Novák, Michal (2018): <a href="https://library.iated.org/view/RYSOVA2018PRA" target="_blank">Practicing Students‘ Writing Skills through eLearning</a>: Automated Evaluation of Text Coherence in Czech. EDULEARN18 Proceedings. Valencia, Spain: IATED Academy, 2018, pp. 1963–1970.</p> +<p dir="ltr">Rysová, KateÅ™ina, Rysová, Magdaléna (2018): <img src="img/document-pdf.png" alt="pdf" width="16" height="16" /> <a href="https://ufal.mff.cuni.cz/~rysova/2019/docs/anaphoric_connectives_EDULEARN.pdf" target="_blank">The Correlation between Discourse-Anaphoric Devices and an Overall Communicative Competence in Learners‘ Essays</a>. In EDULEARN18 Proceedings. Valencia, Spain: IATED Academy, 2144–2154.</p> +<p dir="ltr">Novák, Michal; Rysová, KateÅ™ina; Rysová, Magdaléna; MÃrovský, Jiřà (2017): Incorporating Coreference to Automatic Evaluation of Coherence in Essays. Statistical Language and Speech Processing. Springer International Publishing, 58–69.</p> +<p dir="ltr">Rysová, KateÅ™ina; Rysová, Magdaléna; MÃrovský, JiÅ™Ã; Novák, Michal (2017): <img src="img/document-pdf.png" alt="pdf" width="16" height="16" /> <a href="https://acl-bg.org/proceedings/2017/RANLP%202017/pdf/RANLP082.pdf" target="_blank">Introducing EVALD – Software Applications for Automatic Evaluation of Discourse in Czech</a>. Proceedings of the International Conference Recent Advances in Natural Language Processing, Å umen, Bulgaria: INCOMA Ltd., 634–641. </p> +<p dir="ltr">Rysová, KateÅ™ina; Rysová, Magdaléna; MÃrovský, Jiřà (2016): <img src="img/document-pdf.png" alt="pdf" width="16" height="16" /> <a href="https://www.aclweb.org/anthology/O16-1021.pdf" target="_blank">Automatic evaluation of surface coherence in L2 texts in Czech</a>. In Proceedings of the 28th Conference on Computational Linguistics and Speech Processing ROCLING XXVIII (2016). Taipei, Taiwan: The Association for Computational Linguistics and Chinese Language Processing (ACLCLP), 214–228.</p> +<p dir="ltr">Lyding, Verena, Schöne, Karin (2016): <a href="https://www.aclweb.org/anthology/L16-1392" target="_blank">Design and Development of the MERLIN Learner Corpus Platform</a>. Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC'16), 2471 – 2477.</p> +<p dir="ltr">Å tindlová, B., ÄŒurdova V. (2015): <img src="img/document-pdf.png" alt="pdf" width="16" height="16" /> <a href="https://merlin-platform.eu/docs/9.6-9.8_Research%20Paper_Stindlova_Curdova_2015.pdf" target="_blank">MERLIN: Multilingválnà platforma pro evropské referenÄnà úrovnÄ›</a>. ÄŒasopis pro modernà filologii, dvojÄÃslo u pÅ™Ãležitosti 20. výroÄà ČNK. 2, 190-200.</p> <p>Abel, Andrea; Wisniewski, Katrin; Nicolas, Lionel; Boyd, Adriane; -Hana, Jirka; Meurers, Detmar (2014): <a href="http://www.ojs.unito.it/index.php/ricognizioni/article/view/702/677" target="_blank"><em>A Trilingual Learner Corpus illustrating European Reference Levels</em></a>. In: <em>Ricognizioni – Rivista di Lingue, Letterature e Culture Moderne 2 (1)</em>, 111-126. </p> + Hana, Jirka; Meurers, Detmar (2014): <a href="http://www.ojs.unito.it/index.php/ricognizioni/article/view/702/677" target="_blank">A Trilingual Learner Corpus illustrating European Reference Levels</a>. In: <em>Ricognizioni – Rivista di Lingue, Letterature e Culture Moderne 2 (1)</em>, 111-126. </p> <p>Katrin Wisniewski. <em>Die Validität der Skalen des Gemeinsamen europäischen Referenzrahmens für Sprachen.Eine empirische Untersuchung der Flüssigkeits- und Wortschatzskalen des GeRS am Beispiel des Italienischen und des Deutsche</em>n. Language Testing and Evaluation vol.33, Frankfurt am Main 2014</p> -<p>Adriane Boyd, Jirka Hana, Lionel Nicolas, Detmar Meurers, Katrin Wisniewski, Andrea Abel, Karin Schöne, Barbora Štindlová and Chiara Vettori. <em><a href="docs/LREC14_Paper.pdf" target="_blank">The MERLIN corpus: Learner Language and the CEFR </a><img src="img/document-pdf.png" alt="pdf" width="16" height="16"></em>. Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), European Language Resources Association (ELRA), Reykjavik, May 26-31, 2014.</p> -<p>Katrin Wisniewski, Karin Schöne, Lionel Nicolas, Chiara Vettori, Adriane Boyd, Detmar Meurers, Andrea Abel, Jirka Hana. <em>MERLIN: An online trilingual learner corpus empirically grounding the European Reference Levels in authentic learner data</em>. <a href="docs/322-CEF03-FP-Wisniewski-ICT2013.pdf" target="_blank">ICT for Language Learning 2013, Conference Proceedings</a> <img src="img/document-pdf.png" alt="pdf" width="10" height="15"> Libreriauniversitaria.it. Edizioni, Florence, Italy, November 14-15, 2013.</p> -<p> </p> -<h3> </h3> -<h3><strong><a name="pub-mt"></a>The MERLIN corpus has been used in the following master's theses:</strong></h3> -<p>Tina Schönfelder. <em><a href="docs/MA_thesis_Schoenfelder.pdf" target="_blank">REQUESTS im Italienischen und Deutschen als Fremdsprache</a></em> <em><img src="img/document-pdf.png" alt="" width="16" height="16"></em>. Master's thesis, TU Dresden, February 2014</p> -<p>Tassja Weber. <em><a href="docs/Masterarbeit_Tassja_Weber.pdf" target="_blank">Verbvalenz und Rektion im Bereich Deutsch als Fremdsprache. Eine korpusgestützte Analyse zweier Verbgruppen</a> <img src="img/document-pdf.png" alt="" width="16" height="16"></em>. Master's thesis, TU Dortmund. 2013</p> -<p>Julia Hancke. <em><a href="docs/MA-Thesis-Julia-Hancke.pdf" target="_blank">Automatic Prediction of CEFR Proficiency Levels Based on Linguistic Features of Learner Language</a></em> <em><img src="img/document-pdf.png" alt="" width="16" height="16"></em>. Master's thesis, University of Tübingen, April 2013</p> -<p> </p> -<h2><a name="usermod"></a>User modelling and user interface specification</h2> -<p class="reference"><a href="docs/WP4_UserModelling_Part1.pdf" target="_blank" class="reference">Report on user study - part 1</a> <em><img src="img/document-pdf.png" alt="" width="16" height="16"></em></p> -<p><a href="docs/WP4_UserModelling_Part2Technical_report.pdf" target="_blank" class="reference">Report on user study - part 2</a><strong> / technical part</strong> <em><img src="img/document-pdf.png" alt="" width="16" height="16"></em></p> -<p>The user study conducted at the beginning of the project aimed at:</p> -<ul> - <li> - <p>getting to know users' needs concerning the features of learner language that are considered most relevant,</p> - </li> - <li> - <p> getting to know users' needs concerning the illustration of CEFR levels, </p> - </li> - <li> - <p>collecting data as a basis for the development of experiential, user-based indicators describing L2-competence, </p> - </li> - <li> - <p> guaranteeing the adequacy of the tool on both a content and a technical level.</p> - </li> -</ul> -<p> </p> -<p><a href="docs/MERLIN_del-7.1.pdf"><strong>Written specification of interface capabilities</strong></a> <em><img src="img/document-pdf.png" alt="" width="16" height="16"></em></p> -<p><a href="docs/MERLIN_del-7.2.pdf"><strong>Draft of the platform structure</strong></a> (macrostructure) <em><img src="img/document-pdf.png" alt="" width="16" height="16"></em></p> -<p><a href="docs/MERLIN_del-7.3.pdf"><strong>Design specification of the user interface</strong></a> (microstructure) <em><img src="img/document-pdf.png" alt="" width="16" height="16"></em></p> +<p>Adriane Boyd, Jirka Hana, Lionel Nicolas, Detmar Meurers, Katrin Wisniewski, Andrea Abel, Karin Schöne, Barbora Štindlová and Chiara Vettori (2014): <img src="img/document-pdf.png" alt="pdf" width="16" height="16" /> <a href="docs/LREC14_Paper.pdf" target="_blank">The MERLIN corpus: Learner Language and the CEFR </a>. Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14), European Language Resources Association (ELRA), Reykjavik, May 26-31, 2014.</p> +<p>Katrin Wisniewski, Karin Schöne, Lionel Nicolas, Chiara Vettori, Adriane Boyd, Detmar Meurers, Andrea Abel, Jirka Hana. <em>MERLIN: An online trilingual learner corpus empirically grounding the European Reference Levels in authentic learner data</em>. <img src="img/document-pdf.png" alt="pdf" width="16" height="16" /> <a href="docs/322-CEF03-FP-Wisniewski-ICT2013.pdf" target="_blank">ICT for Language Learning 2013, Conference Proceedings</a> Libreriauniversitaria.it. Edizioni, Florence, Italy, November 14-15, 2013.</p> +<h3><strong><a name="pub-mt"></a>The MERLIN corpus has been used in the following master's and PhD theses:</strong></h3> +<p dir="ltr">Zarah Leonie Weiß: Using Measures of Linguistic Complexity to Assess German L2. Proficiency in Learner Corpora under Consideration of Task-Effects, Tübingen 2017</p> +<p>Weber, Tassja: Präpositionen und Deutsch als Fremdsprache: Quantitative Fallstudien im Lernerkorpus MERLIN. Promotion, Mannheim 2020</p> +<p>Tina Schönfelder. <img src="img/document-pdf.png" alt="" width="16" height="16"> <em><a href="docs/MA_thesis_Schoenfelder.pdf" target="_blank">REQUESTS im Italienischen und Deutschen als Fremdsprache</a></em>. Masterarbeit, TU Dresden, Februar 2014</p> +<p>Tassja Weber. <img src="img/document-pdf.png" alt="" width="16" height="16"> <em><a href="docs/Masterarbeit_Tassja_Weber.pdf" target="_blank">Verbvalenz und Rektion im Bereich Deutsch als Fremdsprache. Eine korpusgestützte Analyse zweier Verbgruppen</a></em>. Masterarbeit, TU Dortmund. 2013</p> +<p>Julia Hancke. <img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/MA-Thesis-Julia-Hancke.pdf" target="_blank"><em>Automatic Prediction of CEFR Proficiency Levels Based on Linguistic Features of Learner Language</em></a>. Masterarbeit, Universität Tübingen, April 2013</p> +<p> </p> +<h2><a name="archiv"></a>Archive</h2> +<p><img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/MERLIN_user-manual-GER.pdf" target="_blank" class="reference">MERLIN user manual</a> | Background information on the goals and methods of the project, documentation of the initial search functionalities of the website</p> +<p><img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/WP4_UserModelling_Part1.pdf" target="_blank" class="reference">Report on user study - part 1</a> | Users' needs concerning the illustration of CEFR levels and the features of learner language that are considered most relevant</p> +<p><img src="img/document-pdf.png" alt="" width="16" height="16"> <a href="docs/WP4_UserModelling_Part2Technical_report.pdf" target="_blank" class="reference">Report on user study - part 2</a> | Technical client-side requirements, required search and interface functionalities, usage of data</p> <p> </p> <!--INSERT END--> </div> diff --git a/en/help-annis-glossary.php b/en/help-annis-glossary.php index f8c8ca6..9fc1c83 100644 --- a/en/help-annis-glossary.php +++ b/en/help-annis-glossary.php @@ -419,7 +419,7 @@ require('F_mainsidebar.php'); <li><em>& meta::_author_L1="German" </em>[English, Russian, Arabic, etc.]</li> </ul> <p><img src="img/hint_bulb.png" alt="hint bulb" /><span class="StilSmall"> The <a href="https://korpling.github.io/ANNIS/4.5/user-guide/interface/index.html" target="_blank" class="reference">ANNIS User Guide</a> offers a thorough introduction to using the ANNIS interface.</span></p> - <h2 dir="ltr"><a name="freqanalysis" id="freqanalysis"></a>4 Retrieve statistical informationen </h2> + <h2 dir="ltr"><a name="freqanalysis" id="freqanalysis"></a>4 Retrieve statistical information </h2> <p dir="ltr">To get an indication of the frequency of certain L2 features use the ANNIS search. </p> <ol> diff --git a/en/mcorpus.php b/en/mcorpus.php index a590c2f..614adb7 100644 --- a/en/mcorpus.php +++ b/en/mcorpus.php @@ -173,7 +173,7 @@ require('F_mainsidebar.php'); </ul> <p>For a comprehensive overview of the texts and the metadata associated with them, you can refer to the table <img src="img/icon_txt.png" alt="txt" width="13" height="16" /> <a href="https://gitlab.inf.unibz.it/commul/merlin-platform/merlin-metadata/tags/v1.1" target="_blank">metadata_ratings_indicators.cvs</a>. It also covers, for each corpus text, numerous indicators targeting L2 features, as well as lexical, morphological, and syntactic complexity measures (for the German corpus).</p> -<h2><a name="figures"></a>Das MERLIN-Korpus in Zahlen </h2> +<h2><a name="figures"></a>The MERLIN corpus in figures </h2> <p>The following charts show the total number of texts at a given CEFR level and the amount of the annotations. The overviews also allow for a comparison of test level and actually rated level.</p> <h3> Number of texts per CEFR level</h3> <p><img src="img/corp-stat-gesamt.png" alt="corp stat all" width="534" height="255" /></p> diff --git a/en/research.php b/en/research.php index a7e5fdf..b1a3b60 100644 --- a/en/research.php +++ b/en/research.php @@ -10,178 +10,121 @@ require('F_mainsidebar.php'); <h1>MERLIN for research</h1> <h2>1. Linking the MERLIN texts to the CEFR</h2> <div id="anchor11"></div> -<h3><a name="reratings"></a>1.1 Re-ratings</h3> - <a href="#anchor11" onClick="toggle('#content11','#img11')"><img id="img11" src="img/toggle-expand.png"></a> +<h3><a name="reratings"></a><a href="#anchor11" onClick="toggle('#content11','#img11')"><img id="img11" src="img/toggle-expand.png"></a> 1.1 Re-ratings</h3> <div id="content11" class="content"> -<p>The MERLIN texts are the writings sections of CEFR-related, standardized high-quality tests from telc (Frankfurt/Main, Italian and German tests, <a href="http://www.telc.net/" target="_blank" class="reference">homepage</a>) and ÚJOP (Prague, Czech tests, <a href="http://ujop.cuni.cz/" target="_blank" class="reference">homepage</a>). These institutions are ALTE-audited (<a href="http://www.alte.org" target="_blank" class="reference">ALTE-homepage</a>). The <a href="C_mcorpus.php#anchor3" target="_blank" class="reference">tasks</a> were in use until 2013 and are now freely available on the platform. However, to have explicit and direct information about the CEFR profiles of the written productions themselves (and not only of the tests as a whole), for MERLIN all texts were re-rated independently by two professional raters per language. -The reliability of the re-ratings was examined with the help of Classical Test Theory and a Multi-Facet Rasch analysis. The latter is a probabilistic statistical procedure often used in language testing which allows for a correction of rating tendencies (e.g., leniency/harshness) and makes it possible to arrive at a fair average rating for each text. The intra-rater and inter-rater reliability was generally very high in MERLIN, with some exceptions for Italian. Therefore, the whole re-rating process was repeated for Italian resulting in a satisfying rating quality. -In MERLIN, the fair average is calculated based on a holistic scale (see <a href="#instruments" class="reference">1.2 rating instruments</a>). If you compile your own corpus based on CEFR levels, these are also based on the fair average ratings (<a href="#" onclick="document.forms['documents'].submit();" class="reference">Define a subcorpus</a> » Overall CEFR rating</strong></em>). -If you are interested in more details regarding the quality of the ratings and the difficulty of the single rating criteria, please consult the <a href="C_download.php#corpus" target="_blank" class="reference">technical report</a>. </p> - -<p> </p> +<p>The MERLIN texts are the writings sections of CEFR-related, standardized high-quality tests from <a href="http://www.telc.net/" target="_blank" class="reference">telc</a> (Frankfurt/Main, Italian and German tests) and <a href="http://ujop.cuni.cz/" target="_blank" class="reference">ÚJOP</a> (Prague, Czech tests). These institutions are ALTE-audited (<a href="http://www.alte.org" target="_blank" class="reference">ALTE</a>). The <a href="C_mcorpus.php#anchor3" target="_blank">tasks</a> were in use until 2013 and are now freely available on the platform. However, to have explicit and direct information about the CEFR profiles of the written productions themselves (and not only of the tests as a whole), for MERLIN all texts were re-rated independently by two professional raters per language. +The reliability of the re-ratings was examined with the help of Classical Test Theory and a Multi-Facet Rasch analysis. The latter is a probabilistic statistical procedure often used in language testing which allows for a correction of rating tendencies (e.g., leniency/harshness) and makes it possible to arrive at a fair average rating for each text. The intra-rater and inter-rater reliability was generally very high in MERLIN, with some exceptions for Italian. Therefore, the whole re-rating process was repeated for Italian resulting in a satisfying rating quality.</p> +<p>In MERLIN, the fair average is calculated based on a holistic scale (see <a href="#instruments">1.2 rating instruments</a>).<br /> + If you are interested in more details regarding the quality of the ratings and the difficulty of the single rating criteria, please consult the <a href="C_download.php#corpus" target="_blank">technical report</a>. </p> </div> <div id="anchor12"></div> -<h3><a name="instruments"></a>1.2 Rating instruments </h3> - <a href="#anchor12" onClick="toggle('#content12','#img12')"><img id="img12" src="img/toggle-expand.png"></a> +<h3><a name="instruments"></a><a href="#anchor12" onClick="toggle('#content12','#img12')"><img id="img12" src="img/toggle-expand.png"></a> 1.2 Rating instruments </h3> <div id="content12" class="content"> -<p>Two rating instruments were used: An assessor-oriented version (Alderson 1991) of the holistic scale (page 2 of the <a href="C_download.php#corpus" target="_blank" class="reference">MERLIN rating grid</a>) for "General Linguistic Range" (Chapter 5, CEFR) was accompanied by an analytical rating grid (page 3 of the <a href="C_download.php#corpus" target="_blank" class="reference">MERLIN rating grid</a>) that is closely connected to Table 3 of the CEFR (CoE 2001). This table was of great importance in the process of scaling the CEFR descriptors (North 2005, 2000). The MERLIN version includes six rating criteria (vocabulary range | vocabulary control | grammatical accuracy | coherence & cohesion|orthography | sociolinguistic appropriateness). These criteria stem from scales in Chapter 5 of the CEFR that specifies aspects of communicative L2 competence. For the construction of the grid, descriptors of these scales were modified in an assessor-oriented way. Plus-levels (A2+, B1+) were excluded as the CEFR does not specify descriptors for these levels for all rating criteria. The rating instruments were piloted before their implementation in the MERLIN project.</p> +<p>Two rating instruments were used: An assessor-oriented version (Alderson 1991) of the holistic scale (page 2 of the <a href="C_download.php#corpus" target="_blank">MERLIN rating grid</a>) for "General Linguistic Range" (Chapter 5, CEFR) was accompanied by an analytical rating grid (page 3 of the <a href="C_download.php#corpus" target="_blank">MERLIN rating grid</a>) that is closely connected to Table 3 of the CEFR (CoE 2001). This table was of great importance in the process of scaling the CEFR descriptors (North 2005, 2000). The MERLIN version includes six rating criteria (vocabulary range | vocabulary control | grammatical accuracy | coherence & cohesion|orthography | sociolinguistic appropriateness). These criteria stem from scales in Chapter 5 of the CEFR that specifies aspects of communicative L2 competence. For the construction of the grid, descriptors of these scales were modified in an assessor-oriented way. Plus-levels (A2+, B1+) were excluded as the CEFR does not specify descriptors for these levels for all rating criteria. The rating instruments were piloted before their implementation in the MERLIN project.</p> </div> -<p> </p> - -<h2><a name="dataprep"></a>2. Preparing the data</h2> +<h2>2. Preparing the data</h2> <div id="anchor21"></div> - <h3>2.1 Transcriptions</h3> - <a href="#anchor21" onClick="toggle('#content21','#img21')"><img id="img21" src="img/toggle-expand.png"></a> + <h3><a href="#anchor21" onClick="toggle('#content21','#img21')"><img id="img21" src="img/toggle-expand.png"></a> 2.1 Transcriptions</h3> <div id="content21" class="content"> -<p>The hand-written original learner texts were transcribed in an xml-based editor (xml mind©) inside the testing institutions (telc and ÚJOP). The transcribers followed <a href="C_download.php#corpus" class="reference">transcription guidelines</a> (available only in German) and the reliability of the transcripts was checked, initially for a sample of 5% of the texts per CEFR level. As many transcription errors were detected, in the end almost all texts had to undergo a revision stage.<br> +<p>The hand-written original learner texts were transcribed in an xml-based editor (xml mind©) inside the testing institutions (telc and ÚJOP). The transcribers followed transcription guidelines and the reliability of the transcripts was checked, initially for a sample of 5% of the texts per CEFR level. As many transcription errors were detected, in the end almost all texts had to undergo a revision stage.<br> The transcription guidelines included tags (inline annotation) for basic textual features such as unreadable or ambiguous stretches of language, foreign language words, emoticons, images, paragraphs, copied words from the rubrics, or greeting formulae. The anonymization (names, places) was part of the transcription process and was carried through based on the guidelines.</p> <div> <div> </div> </div> </div> <div id="anchor22"></div> -<h3>2.2 Tools & formats</h3> -<a href="#anchor22" onClick="toggle('#content22','#img22')"><img id="img22" src="img/toggle-expand.png"></a> +<h3><a href="#anchor22" onClick="toggle('#content22','#img22')"><img id="img22" src="img/toggle-expand.png"></a> 2.2 Tools & formats</h3> <div id="content22" class="content"> -<p>Once the transcriptions were available, all data was converted to PAULA (<a href="purl.org/net/paula" target="_blank" class="reference">purl.org/net/paula</a>), a standoff XML format designed as an exchange format for linguistic annotation. -Further manual annotations were carried through with two tools: MMAX2 (<a href="mmax2.net" target="_blank" class="reference">mmax2.net</a>) and the Falko Excel Add-in (<a href="purl.org/net/falko" target="_blank" class="reference">purl.org/net/falko</a>). MMAX2 is a text annotation tool that allows multi-layered annotation. It was used for the annotation of learner language features (see <a href="#annotations" class="reference">2.3.1</a>). The Falko Add-in was used for annotating both target hypothesis 1 and 2 (» <em> <strong>for more details on the annotation of target hypotheses with the Falko Add-in see</strong></em> <a href="http://www.linguistik.hu-berlin.de/institut/professuren/korpuslinguistik/forschung/falko/Falko-Handbuch_Korpusaufbau und Annotationen_v2.01" target="_blank" class="reference">Falko-Handbuch</a>). -Automatic annotation made use of the UIMA framework (<a href="uima.apache.org" target="_blank" class="reference">uima.apache.org</a>). UIMA allows a modular integration of a wide range of NLP tools such as part-of-speech taggers and parsers. For the advanced search functions, the open source web-browser based search and visualization architecture ANNIS (<a href="purl.org/net/annis" target="_blank" class="reference">purl.org/net/annis</a>) is used in the MERLIN interface (<a href="#" onclick="document.forms['help-annis'].submit();" target="_blank" class="reference">see explanations on search output in ANNIS</a>). </p> -<p> </p> +<p>Once the transcriptions were available, all data was converted to <a href="http://www.sfb632.uni-potsdam.de/paula.html" target="_blank" class="reference">PAULA</a>, a standoff XML format designed as an exchange format for linguistic annotation.</p> +<p>Further manual annotations were carried through with two tools: <a href="http://mmax2.net/" target="_blank" class="reference">MMAX2</a> and the <a href="https://www.linguistik.hu-berlin.de/de/institut/professuren/korpuslinguistik/forschung/falko/tools" target="_blank" class="reference">Falko Excel Add-in</a>. MMAX2 is a text annotation tool that allows multi-layered annotation. It was used for the annotation of learner language features (see <a href="#annotations">2.3.1</a>). The Falko Add-in was used for annotating both target hypothesis 1 and 2. </p> +<p>Automatic annotation made use of the <a href="https://uima.apache.org/" target="_blank" class="reference">UIMA framework</a>. UIMA allows a modular integration of a wide range of NLP tools such as part-of-speech taggers and parsers. For the advanced search functions, the open source web-browser based search and visualization architecture <a href="https://corpus-tools.org/annis/" target="_blank" class="reference">ANNIS</a> is used. </p> </div> <div id="anchor231"></div> -<p> </p> <h3><a name="annotations"></a>2.3 Annotations</h3> -<p>A short introduction to the structure of the MERLIN annotations is provided <a href="C_annotation.php#anchor1" target="_blank" class="reference">here</a>. Here, you find more detailed information on the single annotation layers that are available for the whole corpus, for the smaller core corpus, and you find indications on quality control aspects.</p> -<blockquote> - <h4>2.3.1 Manual annotations available for the whole corpus <a href="#anchor231" onClick="toggle('#content231','#img231')"><img id="img231" src="img/toggle-expand.png"></a></h4> -</blockquote> +<h4><a href="#anchor231" onClick="toggle('#content231','#img231')"><img id="img231" src="img/toggle-expand.png"></a> 2.3.1 Manual annotations available for the whole corpus</h4> <div id="content231" class="content"> <p><img src="img/annotations_GRAPHIC-layer_en1.png" width="534" height="195" alt="EA1"></p> - <p> </p> - <h5>Minimal target hypotheses / target hypotheses 1 (TH1)</h5> + <h5>Minimal target hypotheses / target hypotheses 1 (TH1)</h5> <p>All annotation is necessarily based on human interpretation of what the person who produced the text might have had on his/her mind. It is important to make this interpretation explicit so that MERLIN users can understand the annotations better. Therefore, the MERLIN corpus contains rule-based target hypotheses that suggest a corrected version of the learner texts. <br> - In the main phase of annotation, an orthographically and grammatically correct version of the learner text was created (target hypotheses 1, TH1) for the whole corpus. As little interventions as possible were allowed by the annotator. In this table, you find a simple example (for a definition of the tiers, please refer to the <a href="#" onclick="document.forms['help-annis'].submit();" target="_blank" class="reference">explanations of the search output</a>):</p> + In the main phase of annotation, an orthographically and grammatically correct version of the learner text was created (target hypotheses 1, TH1) for the whole corpus. As little interventions as possible were allowed by the annotator. In this table, you find a simple example:</p> <p><img src="img/TH1_example1.png"></p> <p>The following example by the same learner shows that in TH1, errors from other linguistic areas were ignored. There are content and technical reasons for this.</p> <p><img src="img/TH1_example2.png"></p> - <p>While the orthographical (capitalization error, word boundary error, missing hyphen) and grammatical (missing article) errors are corrected in the TH1 (termed ‘ZH1’ here), the lexically erroneous form *Reisespass (instead of “Reisepass”) was not substituted by another lexeme. Phenomena like this are annotated in the <a href="#corecorpus" target="_blank" class="reference">MERLIN core corpus</a> (for definitions of the errors see <a href="C_download.php#annotations" target="_blank" class="reference">MERLIN annotation scheme</a>).</p> - <p>The team followed the target hypotheses rules developed for the <a href="http://www.linguistik.hu-berlin.de/institut/professuren/korpuslinguistik/forschung/falko" target="_blank" class="reference">Falko corpus</a> and adapted them to the project needs where necessary (cf. Reznicek/Lüdeling et al. 2012; see <a href="C_download.php#annotations" target="_blank" class="reference">annotation structure guidelines and Documentation of annotation decisions</a>). In some cases, annotators agreed upon annotation rules on a very fine-grained level. For example, it was decided that in German, the final double <ss> instead of standard German spelling <ß> was not changed in texts in which it might be possible that the learner consistently used the Swiss spelling, which does not use the <ß>. For single decisions that you might be interested in, please consult <a href="C_download.php#annotations" target="_blank" class="reference">the Documentation of annotation decisions</a>.</p> + <p>While the orthographical (capitalization error, word boundary error, missing hyphen) and grammatical (missing article) errors are corrected in the TH1, the lexically erroneous form *Reisespass (instead of “Reisepass”) was not substituted by another lexeme. Phenomena like this are annotated in the <a href="#corecorpus" target="_blank">MERLIN core corpus</a> (for definitions of the errors see <a href="C_download.php#annotations" target="_blank">MERLIN annotation scheme</a>).</p> + <p>The team followed the target hypotheses rules developed for the <a href="http://www.linguistik.hu-berlin.de/institut/professuren/korpuslinguistik/forschung/falko" target="_blank" class="reference">Falko corpus</a> and adapted them to the project needs where necessary (cf. Reznicek/Lüdeling et al. 2012; see <a href="C_download.php#annotations" target="_blank">annotation manual</a>). In some cases, annotators agreed upon annotation rules on a very fine-grained level. For example, it was decided that in German, the final double <ss> instead of standard German spelling <ß> was not changed in texts in which it might be possible that the learner consistently used the Swiss spelling, which does not use the <ß>. Annotation decisions have been documented consistently and are available upon request.</p> <p>TH1 were compiled for the whole MERLIN corpus. The TH1 were written in Excel with the help of the Falko Add-in. The TH1 was piloted before the actual annotation took place.</p> - <p> If you want to display the TH1 on the MERLIN platform, go to <strong><em><a href="#" onclick="document.forms['advanced'].submit();" class="reference">Advanced search.</a></em></strong> To get explanations about the output you get there, read more <a href="#" onclick="document.forms['help-annis'].submit();" target="_blank" class="reference">here</a>. You can also display TH1 for whole texts in the search results of <em><strong><a href="#" onclick="document.forms['documents'].submit();" class="reference">Define a subcorpus</a></strong></em>.</p> - <p> </p> - <table border="0" cellspacing="1" cellpadding="0"> - <tr> - <td valign="top"><img src="img/aim-icon.png" width="30" height="30" alt="go"></td> - <td width="720" bgcolor="#CCCCCC"><p>Useful links & downloads with regard to TH1:<br> - <a href="C_download.php#annotations" target="_blank" class="reference">MERLIN annotation manual</a><br> - <a href="http://www.linguistik.hu-berlin.de/institut/professuren/korpuslinguistik/forschung/falko/Falko-Handbuch_Korpusaufbau%20und%20Annotationen_v2.01" target="_blank" class="reference">Das Falko-Handbuch. Korpusaufbau und Annotationen. Version 2.01. HU Berlin</a> (Falko guidelines)<br> - <a href="C_download.php#annotations" target="_blank" class="reference">Documentation of annotation decisions</a></p></td> - </tr> - </table> - <p> </p> <h5><strong><a name="ea1"></a></strong>Manual annotation of grammatical and orthographical learner language features – error annotation 1 (EA1)</h5> -<p>Building on the target hypotheses 1, all MERLIN texts were annotated with grammatical and orthographical language features from various sources (error annotation 1 – EA1). You can find a complete list of the features (“tags”) with examples <a href="C_annotation.php#anchor4" target="_blank" class="reference">here</a>, while the <a href="C_download.php#annotations" target="_blank" class="reference">annotation scheme </a> gives you full access to the definitions of each learner language feature and additional examples.</p> +<p>Building on the target hypotheses 1, almost all MERLIN texts (for details see <a href="C_mcorpus.php#figures" target="_blank">The MERLIN corpus in figures</a>) were annotated with grammatical and orthographical language features from various sources (EA1 = error annotation 1). You can find a complete list of the features (“tags”) with examples in <a href="C_annotation.php#featurelist" target="_blank">MERLIN annotations</a>, while the <a href="C_download.php#annotations" target="_blank">annotation scheme</a> gives you full access to the definitions of each learner language feature and additional examples.</p> <p>The MERLIN annotation tags for <strong>EA1 and EA2</strong> were derived from …</p> <ol> <li> - <p><strong>CEFR scales</strong>: some tags were chosen to support research about the empirical validity of the CEFR scales underlying the <a href="C_download.php#corpus" target="_blank">MERLIN analytical rating grid </a> (chapter 5 of the CEFR, CoE 2001). They can help to control whether the predictions of selected CEFR descriptors correspond to learner behaviour, e.g.: intelligibility, use of idioms, content jumps (<a href="#scale-valid" class="reference">see 3.2 MERLIN for scale validation</a>).  </p> + <strong>CEFR scales</strong>: some tags were chosen to support research about the empirical validity of the CEFR scales underlying the <a href="C_download.php#corpus" target="_blank">MERLIN analytical rating grid </a> (chapter 5 of the CEFR, CoE 2001). They can help to control whether the predictions of selected CEFR descriptors correspond to learner behaviour, e.g.: intelligibility, use of idioms, content jumps (<a href="#scale-valid">see 3.2 MERLIN for scale validation</a>). </li> <li> - <p>issues in current <strong>SLA research</strong>, e.g. grammatical aspects such as verb valency, word order, negation, or lexical aspects, e.g. the use of formulaic sequences (<a href="#bib" class="reference">references</a>)</p> + issues in current <strong>SLA research</strong>, e.g. grammatical aspects such as verb valency, word order, negation, or lexical aspects, e.g. the use of formulaic sequences (<a href="#bib">references</a>) </li> <li> - <p>features reported to the MERLIN team by <strong>testers, teachers and teacher trainers</strong> in a questionnaire study and in expert interviews as being relevant for assessing language mastery at certain levels, e.g. the verbal aspect in Italian and Czech </p> + features reported to the MERLIN team by <strong>testers, teachers and teacher trainers</strong> in a questionnaire study and in expert interviews as being relevant for assessing language mastery at certain levels, e.g. the verbal aspect in Italian and Czech </li> <li> - <p><strong>textbook and language test analyses </strong>revealed further recurrent topics some of which were included in the MERLIN annotation scheme, e.g. German modal verbs</p> + <strong>textbook and language test analyses </strong>revealed further recurrent topics some of which were included in the MERLIN annotation scheme, e.g. German modal verbs </li> <li> - <p><strong>learner text analyses</strong> carried out in a random sample of MERLIN texts (5% per test level/language), e.g. use of articles and clitics</p> + <strong>learner text analyses</strong> carried out in a random sample of MERLIN texts (5% per test level/language), e.g. use of articles and clitics </li> </ol> -<p> </p> -<p>The annotation scheme specifies to which group(s) the single learner language features belong.</p> -<p>Furthermore, most error-related MERLIN tags (EA1 & EA2) incorporate the widely used <strong>‘target language modification’</strong> dimension (cf. Díaz-Negrillo/Fernández-Domínguez 2006). This dimension specifies the type of error: an element might have been omitted, changed, added, repositioned, merged with, or split from another element). You can find details about this in the <a href="C_download.php#annotations" target="_blank" class="reference">annotation scheme</a>. </p> -<p>You can search for the annotated learner language features in the <strong><em><a href="#" onclick="document.forms['advanced'].submit();" class="reference">Advanced search</a>,</em></strong> or you can extract lists of features relevant for a specific linguistic field or a specific CEFR level here <strong><a href="#" onclick="document.forms['statistics'].submit();" class="reference">Statistics.</a></strong> -</p> -<p> </p> -<table border="0" cellspacing="1" cellpadding="0"> - <tr> - <td valign="top"><img src="img/aim-icon.png" width="30" height="30" alt="go"></td> - <td width="720" bgcolor="#CCCCCC"><p>Further links:<br> - <a href="#" onclick="document.forms['help-annis'].submit();" target="_blank" class="reference">advanced search output explanation</a><br> - <a href="C_download.php#annotations" target="_blank" class="reference">annotation scheme</a><br> - <a href="#bib" class="reference">references</a><br> - <a href="C_annotation.php#anchor4" target="_blank" class="reference">list with learner language features and examples</a></p></td> - </tr> -</table> -<p> </p> +<p>The <a href="C_download.php#annotations" target="_blank">annotation scheme</a> specifies to which group(s) the single learner language features belong.</p> +<p>Furthermore, most error-related MERLIN tags (EA1 & EA2) incorporate the widely used <strong>‘target language modification’</strong> dimension (cf. Díaz-Negrillo/Fernández-Domínguez 2006). This dimension specifies the type of error: an element might have been omitted, changed, added, repositioned, merged with, or split from another element). You can find details about this in the <a href="C_download.php#annotations" target="_blank">annotation scheme</a>. </p> + </div> <div id="anchor232"></div> -<blockquote> - <h4><a name="corecorpus"></a>2.3.2 Manual annotations in the MERLIN core corpus <a href="#anchor232" onClick="toggle('#content232','#img232')"><img id="img232" src="img/toggle-expand.png"></a></h4> -</blockquote> + <h4><a name="corecorpus"></a><a href="#anchor232" onClick="toggle('#content232','#img232')"><img id="img232" src="img/toggle-expand.png"></a> 2.3.2 Manual annotations in the MERLIN core corpus</h4> <div id="content232" class="content"> - <p> </p> <h5>The structure of the MERLIN core corpus</h5> - <p>For a small pilot sample (the <strong>MERLIN core corpus</strong>), in addition to grammar and orthography more linguistic dimensions are taken into consideration. The <strong>MERLIN core corpus</strong> consists of texts that received <a href="#reratings" class="reference">fair averages</a> of either A2 or B2. Thus, two groups of learners with a clearly distinct level of proficiency can be compared. It is important to notice that the ratings the learners received do not necessarily correspond to the CEFR level of the test they decided to take. You can distinguish between these dimensions here <em><strong><a href="#" onclick="document.forms['documents'].submit();" class="reference">Define a subcorpus</a> </strong></em>(“CEFR level of test” and “Overall CEFR rating”).</p> - <p>Many outperformed the targeted CEFR levels, while others’ performances were rated lower than the learners would have expected. An extreme case is Italian, where only two texts actually received a B2 level, while many more students took B2 tests. Here, the MERLIN core corpus incorporates the 100 texts that were placed highest on the Rasch logit scale (<a href="C_download.php#corpus" target="_blank" class="reference">technical report</a>). </p> + <p>For a small pilot sample (the <strong>MERLIN core corpus</strong>), in addition to grammar and orthography more linguistic dimensions are taken into consideration. The <strong>MERLIN core corpus</strong> consists of texts that received <a href="#reratings">fair averages</a> of either A2 or B2. Thus, two groups of learners with a clearly distinct level of proficiency can be compared. It is important to notice that the ratings the learners received do not necessarily correspond to the CEFR level of the test they decided to take. </p> + <p>Many outperformed the targeted CEFR levels, while others’ performances were rated lower than the learners would have expected. An extreme case is the Italian corpus, where only two texts actually received a B2 level, while many more students took B2 tests. Here, the MERLIN core corpus incorporates the 100 texts that were placed highest on the Rasch logit scale (<a href="C_download.php#corpus" target="_blank">technical report</a>). </p> <p><img src="img/annotations_GRAPHIC-layer_en2.png" width="529" height="200"></p> - <p> </p> - <h5>Core corpus: extended target hypotheses / target hypotheses 2 (TH2)  </h5> + <h5>Extended target hypotheses / target hypotheses 2 (TH2)  </h5> <p> Target hypotheses 2 aim at creating an acceptable version of the learner text. This process involves more subjectivity and difficulties of decision reliability, which is why it was separated from the level of target hypotheses 1 like in the Falko project with which there was a strong cooperation. The aim of TH2 is to capture the perspective of <strong>acceptability</strong> of the learner text (not, like for TH1, its correctness). TH2 therefore are an extension of TH1. To this aim, the learner text was still only minimally modified while at the same time its reconstruction comes close to what a native speaker utterance would look like. This reconstruction regards semantic and lexical aspects, pragmatics, and sociolinguistics. Other than in the TH1, phenomena that over-arch sentences and that are determined by the context are modified, too.</p> - <p>You can search for the TH2 in the <em><strong><a href="#" onclick="document.forms['simple'].submit();" class="reference">Simple search</a></strong></em> and in the <em><strong><a href="#" onclick="document.forms['advanced'].submit();" class="reference">Advanced search</a></strong></em>.</p> - <p> </p> - <h5>Core corpus: annotations of sociolinguistic, pragmatic, lexical, and other learner language features  (error annotation 2, EA2)</h5> - <p>For a part of the MERLIN core corpus, many tags from various linguistic perspectives were added to the grammatical and orthographical learner language features annotated in the main stage of the project. These tags stem from the same sources as the EA1 annotations (<a href="#ea1" class="reference">see 2.3.1</a>). </p> - <p>You can find detailed information about the single tags which include, for example, the speech act REQUEST, the use of language with an inappropriate level of formality, the use of structures that pertain to spoken language variants, or reference problems in the <a href="C_download.php#annotations" target="_blank" class="reference">annotation scheme</a>. You can get an overview of the annotated features and find examples <a href="C_annotation.php#anchor4" target="_blank" class="reference">in this table</a>.</p> + <h5>Annotations of sociolinguistic, pragmatic, lexical, and other learner language features  (error annotation 2, EA2)</h5> + <p>For a part of the MERLIN core corpus, many tags from various linguistic perspectives were added to the grammatical and orthographical learner language features annotated in the main stage of the project. These tags stem from the same sources as the EA1 annotations (<a href="#ea1">see 2.3.1</a>). </p> + <p>You can find detailed information about the single tags which include, for example, the speech act REQUEST, the use of language with an inappropriate level of formality, the use of structures that pertain to spoken language variants, or reference problems in the <a href="C_download.php#annotations" target="_blank">annotation scheme</a>. </p> <p>Again, the MERLIN tags incorporate the widely used ‘target language modification’ dimension (cf. DÃaz-Negrillo/Fernández-DomÃnguez 2006) which yields information about the type of the learner language feature (an element might have been omitted, changed, added, repositioned, merged with, or split from another element). </p> - <p>You can find these learner language features in the <em><strong>»</strong></em> <strong><em><a href="#" onclick="document.forms['advanced'].submit();" class="reference">Advanced search</a></em></strong>. You can compile a list of these features for a particular linguistic area or a specific CEFR level here <a href="#" onclick="document.forms['statistics'].submit();" class="reference"><strong><em>Statistics. </em></strong></a></p> - -<p> </p> -</div> + </div> <div id="anchor233"></div> -<blockquote> - <h4>2.3.3 Quality control aspects of the annotation process <a href="#anchor233" onClick="toggle('#content233','#img233')"><img id="img233" src="img/toggle-expand.png"></a></h4> -</blockquote> + <h4><a href="#anchor233" onClick="toggle('#content233','#img233')"><img id="img233" src="img/toggle-expand.png"></a> 2.3.3 Quality control aspects of the annotation process</h4> <div id="content233" class="content"> <p>It was important to make sure that the annotations in the MERLIN corpus are as <strong>consistent</strong> as possible, even if a certain degree of subjectivity is unavoidable. To this aim, the MERLIN project carried through a number of measures:</p> - <p> First of all, all instruments (TH 1 & TH2 rules, annotation scheme for EA1 and EA2) were <strong>piloted</strong> before their implementation. This allowed to detect possibly problematic aspects which could be corrected before the annotations started.</p> - <p> Secondly, all annotations are based on <strong>guidelines</strong> (<a href="C_download.php#annotations" target="_blank" class="reference">annotation manual</a>, <a href="http://www.linguistik.hu-berlin.de/institut/professuren/korpuslinguistik/forschung/falko/Falko-Handbuch_Korpusaufbau und Annotationen_v2.01" target="_blank" class="reference">Falko-Handbuch</a>). The guidelines were enriched by <strong>fine-grained decisions</strong> on single aspects of annotation (<a href="C_download.php#annotations" target="_blank" class="reference">documentation of annotation decisions</a>). </p> - <p> A third measure to control the quality of annotations is their <strong>documentation</strong>. Many decisions had to be taken about which tag to apply to what phenomenon, and consistency among the three project languages had to be taken care of. The most important discussions among the annotators are documented in the <a href="C_download.php#annotations" target="_blank" class="reference">documentation of annotation decisions</a>. In the <a href="C_download.php#annotations" target="_blank" class="reference">annotation scheme</a>, the ‘related tags’ sections mirror some of the extensive discussion processes. </p> - <p>Last but not least, the reliability of the annotations was controlled also a little bit more formally. <strong>Re</strong><strong>liability</strong> of annotations was controlled for 5% of the texts on each test level for target hypotheses (1 & 2) and error annotation (1 & 2). Different methods were applied: </p> - <blockquote> - <p> In a <strong>qualitative</strong> approach, half of the files were annotated independently by the coders to then be commonly discussed with the aim to arrive at a <strong>consensus</strong>. This happened before the annotation (which was done level by level) of the level started. The texts served as a reference throughout the annotation process. </p> - <blockquote> - <p> The second half of the files checked for reliability was annotated by all coders without their knowledge. This <strong>quantitative</strong>, <strong>double-blind procedure</strong> allows to check for intra-coder reliability (the consistency of one and the same annotator) and inter-coder reliability (the degree of agreement between different annotators). </p> - <p>Although EA2 annotations underwent these quality control measures as well, -they are of an explorative pilot character. Therefore, it would be -desirable to have these annotations completely re-checked before extending -the annotations to the complete MERLIN database. In the meantime, users are -asked to analyse EA2 annotations with caution.</p> - <p> </p> - <h5>Consistency and interference of annotation layers </h5> + <ul> + <li> All instruments (TH 1 & TH2 rules, annotation scheme for EA1 and EA2) were <strong>piloted</strong> before their implementation. This allowed to detect possibly problematic aspects which could be corrected before the annotations started.</li> + <li> All annotations are based on <strong>guidelines</strong> (<a href="C_download.php#annotations" target="_blank">annotation manual</a>, <a href="http://www.linguistik.hu-berlin.de/institut/professuren/korpuslinguistik/forschung/falko/Falko-Handbuch_Korpusaufbau und Annotationen_v2.01" target="_blank" class="reference">Falko-Handbuch</a>). The guidelines were enriched by <strong>fine-grained decisions</strong> on single aspects of annotation. </li> + <li>The quality of the annotations was assured by comprehensive documentation of annotation decisions, also to guarantee the consistency of the annotations for the three project languages.</li> + </ul> + <p>Last but not least, the reliability of annotations was controlled for 5% of the texts on each test level for target hypotheses (1 & 2) and error annotation (1 & 2). Different methods were applied: </p> + <ul> + <li> In a <strong>qualitative</strong> approach, half of the files were annotated independently by the coders to then be commonly discussed with the aim to arrive at a <strong>consensus</strong>. This happened before the annotation (which was done level by level) of the level started. The texts served as a reference throughout the annotation process. </li> + <li> The second half of the files checked for reliability was annotated by all coders without their knowledge. This <strong>quantitative</strong>, <strong>double-blind procedure</strong> allows to check for intra-coder reliability (the consistency of one and the same annotator) and inter-coder reliability (the degree of agreement between different annotators).</li> + </ul> + <p>Although EA2 annotations underwent these quality control measures as well, + they are of an explorative pilot character. Therefore, users are + asked to analyse EA2 annotations with caution.</p> + <h5>Consistency and interference of annotation layers </h5> <div> - <div> </div> + <div></div> </div> - </blockquote> - </blockquote> <p>From a technical perspective, it was complex to integrate and harmonize the different annotation formats in MERLIN without losing information or creating imprecisions. <br> At the same time, on a content level, contradictions between the different annotation levels (TH1-EA1-TH2-EA2) were to be avoided.<br> - TH1 and EA1 are closely connected. If there is a change of the learner text on TH1, there ought to be a tag on EA1 that makes the learner language feature explicit in detail. There are single exceptions to this rule which are documented in the <a href="C_download.php#annotations" target="_blank" class="reference">documentation of annotation decisions</a>. <br> - Also, all EA2 annotations are reflected in TH2. The opposite, however, is not necessarily true: There might be TH2 modifications that are needed to arrive at an acceptable version of the learner text and that are not part of the <a href="C_download.php#annotations" target="_blank" class="reference">MERLIN annotation scheme</a>. The MERLIN team might have not included a phenomenon if it was not considered relevant and/or feasible. </p> + TH1 and EA1 are closely connected. If there is a change of the learner text on TH1, there ought to be a tag on EA1 that makes the learner language feature explicit in detail. <br> + Also, all EA2 annotations are reflected in TH2. The opposite, however, is not necessarily true: There might be TH2 modifications that are needed to arrive at an acceptable version of the learner text and that are not part of the <a href="C_download.php#annotations" target="_blank">MERLIN annotation scheme</a>. The MERLIN team might have not included a phenomenon if it was not considered relevant and/or feasible. </p> <div> <div> </div> <div> </div> </div> </div> <div id="anchor234"></div> -<blockquote> - <h4>2.3.4 Automatic annotations in MERLIN <a href="#anchor233" onClick="toggle('#content234','#img234')"><img id="img234" src="img/toggle-expand.png"></a></h4> -</blockquote> -<div id="content234" class="content"></p> - -<p>In MERLIN, a combination of automatic and manual [link] annotation + <h4><a href="#anchor233" onClick="toggle('#content234','#img234')"><img id="img234" src="img/toggle-expand.png"></a> 2.3.4 Automatic annotations in MERLIN</h4> +<div id="content234" class="content"> + <p>In MERLIN, a combination of automatic and manual annotation procedures was used in order to prepare learner texts for integration into the platform. We have applied existing automatic annotation tools developed for the target languages in order to expand the range of @@ -191,22 +134,19 @@ asked to analyse EA2 annotations with caution.</p> learner language, since learner language often deviates considerably from the target language across all levels of linguistic analysis, from spelling to semantics.</p> -<p> </p> <h5>The following tools were used for all three MERLIN languages:</h5> -<p>Texts were tokenized using the <a href="http://alias-i.com/lingpipe/docs/api/com/aliasi/tokenizer/IndoEuropeanTokenizerFactory.htm" target="_blank" class="reference">tokenizer for Indo-European +<p>Texts were tokenized using the <a href="http://www.alias-i.com/lingpipe/docs/api/com/aliasi/tokenizer/IndoEuropeanTokenizerFactory.html" target="_blank" class="reference">tokenizer for Indo-European languages</a> from LingPipe and the resulting tokenization was then corrected by hand. <br> -Sentences were annotated with the <a href="https://opennlp.apache.org/documentation/1.5.3/manual/opennlp.html#tools.sentdetect" target="_blank" class="reference">OpenNLP sentence +Sentences were annotated with the <a href="https://opennlp.apache.org/docs/1.9.4/manual/opennlp.html#tools.sentdetect" target="_blank" class="reference">OpenNLP sentence segmenter</a>.<br> Repetitions were identified using the <a href="https://code.google.com/p/saphre" target="_blank" class="reference">Saphre library</a> on the basis of the automatic part-of-speech and lemma annotation described below.</p> -<p> </p> <h5>Language-Specific Tools</h5> <p>MERLIN contains part-of-speech tags (tok_pos), lemmas (tok_lemma), and dependency parses (dependencies) for all three languages. Additional part-of-speech tags, lemmas, and morphological analyses from alternate tools are included where available. Details about the annotation tools and annotation schemes are provided for each language individually below.</p> -<p> </p> <table border="0" cellspacing="2" cellpadding="0"> <tr> <td rowspan="4" valign="top" bgcolor="#CCCCCC"><p>CZECH</p></td> @@ -285,43 +225,34 @@ and annotation schemes are provided for each language individually below.</p> included as tok_pos_bohnet and tok_morph_bohnet.</p></td> </tr> </table> -<p> </p> </div> -<p> </p> -<div id="anchor3"></div> -<h2>3. Using MERLIN for research purposes</strong> <a href="#anchor3" onClick="toggle('#content3','#img3')"><img id="img3" src="img/toggle-expand.png"></a></h2> +<h2><a href="#anchor3" onClick="toggle('#content3','#img3')"><img id="img3" src="img/toggle-expand.png"></a> 3. Using MERLIN for research purposes</h2> <div id="content3" class="content"> -<p>The main aim of MERLIN is not research-oriented: the platform was developed for practitioners who need empirical illustrations of rated CEFR levels for Czech, Italian, and German. Recently, an increasing number of initiatives (like <a href="http://www.slate.eu.org/" target="_blank" class="reference">SLATE</a>) have started to collect authentic learner language rated according to CEFR levels. Some of them pertain to the <em>Reference Level Descriptions</em> (RLD) initiative, i.e. a specification of the CEFR levels for single languages (the most prominent example is the <a href="http://www.englishprofile.org/" target="_blank" class="reference">English Profile Project</a>, other projects are ASK for Norwegian, Carlsen 2013, or the Profilo della lingua italiana, Spinelli/Parizzi 2010). The Council of Europe encourages the development of RLDs (CoE 2005, see <a href="http://www.coe.int/t/dg4/linguistic/cadre1_en.asp" title="CoE website for RLD" target="_blank" class="reference">CoE website for Reference Level Descriptions</a>).<br> -From corpora like these, features that characterize CEFR levels (sometimes called “criterial features”, Hawkins/FilipovÃc 2012) can be extracted. This process helps to deepen the understanding of what CEFR-related ratings mean and to build its use on firmer, empirical grounds. MERLIN contributes to the empirically-based exploration of the CEFR for German, Italian, and Czech. It differs from most existing initiatives in that all data, including full texts, test tasks and annotations, are fully and freely available online.<br> +<p>The main aim of MERLIN is not research-oriented: the platform was developed for practitioners who need empirical illustrations of rated CEFR levels for Czech, Italian, and German. An increasing number of initiatives have started to collect authentic learner language rated according to CEFR levels. Some of them pertain to the <em>Reference Level Descriptions</em> (RLD) initiative, i.e. a specification of the CEFR levels for single languages (the most prominent example is the <a href="http://www.englishprofile.org/" target="_blank" class="reference">English Profile Project</a>, other projects are <em>ASK</em> for Norwegian, Carlsen 2013, or the <em>Profilo della lingua italiana,</em> Spinelli/Parizzi 2010). The Council of Europe encourages the development of RLDs (CoE 2005, see <a href="http://www.coe.int/t/dg4/linguistic/cadre1_en.asp" title="CoE website for RLD" target="_blank" class="reference">CoE website for Reference Level Descriptions</a>).<br> +From corpora like these, features that characterize CEFR levels (sometimes called <em>criterial features</em>, Hawkins/FilipovÃc 2012) can be extracted. This process helps to deepen the understanding of what CEFR-related ratings mean and to build its use on firmer, empirical grounds. MERLIN contributes to the empirically-based exploration of the CEFR for German, Italian, and Czech. It differs from most existing initiatives in that all data, including full texts, test tasks and annotations, are fully and freely available online.<br> Apart from this major practical aim<strong>, </strong>MERLIN is relevant for research purposes from various perspectives: </p> -<p> </p> </div> <div id="anchor31"></div> -<h3><a name="scale-valid"></a>3.1 Validating CEFR scales with MERLIN</h3> - <a href="#anchor31" onClick="toggle('#content31','#img31')"><img id="img31" src="img/toggle-expand.png"></a> +<h3><a name="scale-valid"></a><a href="#anchor31" onClick="toggle('#content31','#img31')"><img id="img31" src="img/toggle-expand.png"></a> 3.1 Validating CEFR scales with MERLIN</h3> <div id="content31" class="content"> <p>The Council of Europe effort of scaling the CEFR descriptors (CoE 2001; North 2000; Schneider/North 2000) has led to immense improvements in standardization and transparency in language learning, teaching, and testing. Important decisions about language learners' lives are taken with reference to the CEFR levels. In many ways, it seems as if the scales have acquired a life of their own; often, they are over-estimated, misunderstood and applied in ways that they were not meant to be used for (North 2000). One crucial aspect that is yet insufficiently understood is the empirical validity of the CEFR scales (Fulcher 2004; Hulstijn 2007): If scales are used to describe or rate learner language, they must reflect what learners actually do (Alderson 1991). -In spite of this, up to date there is almost no research that examines the power of the CEFR descriptors to capture the language learners actually produce (Wisniewski 2014). MERLIN allows to directly analyze the relationship between selected CEFR descriptors (such as "circumlocutions" or "content jumps" which were operationalized and annotated (see <a href="C_download.php#annotations" target="_blank" class="reference">MERLIN annotation scheme</a>) and learner language without having to rely on ratings. </p> +In spite of this, up to date there is almost no research that examines the power of the CEFR descriptors to capture the language learners actually produce (Wisniewski 2014). MERLIN allows to directly analyze the relationship between selected CEFR descriptors (such as "circumlocutions" or "content jumps" which were operationalized and annotated (see <a href="C_download.php#annotations" target="_blank">MERLIN annotation scheme</a>) and learner language without having to rely on ratings. </p> </div> <div id="anchor32"></div> -<h3>3.2 MERLIN and second language acquisition studies</h3> -<a href="#anchor32" onClick="toggle('#content32','#img32')"><img id="img32" src="img/toggle-expand.png"></a> +<h3><a href="#anchor32" onClick="toggle('#content32','#img32')"><img id="img32" src="img/toggle-expand.png"></a> 3.2 MERLIN and second language acquisition studies</h3> <div id="content32" class="content"> <p>Many studies from the area of second language acquisition (SLA) refer to proficiency levels when describing the development and the variation of learner language. However, in many cases the proficiency classification is not yet based on procedures that comply with the strict standards that need to be met from the perspective of research-based, high-quality language testing (see for example AERA/APA/NCME; ALTE 2001; Bachman/Palmer 1996; <a href="http://www.ealta.eu.org/documents/archive/guidelines/English.pdf" target="_blank" class="reference">EALTA code of practice</a>). There is a particular lack of strict testing procedures and easily accessible empirical data for languages other than English when it comes to CEFR-based proficiency classifications. Although MERLIN is small in size, its reliable relationship to the CEFR makes it a precious resource for future SLA studies. Also, it can be used for triangulating and validating data for many existing studies. </p> </div> <div id="anchor33"></div> -<h3>3.3 MERLIN to advance NLP of learner language</h3> - <a href="#anchor33" onClick="toggle('#content33','#img33')"><img id="img33" src="img/toggle-expand.png"></a> +<h3><a href="#anchor33" onClick="toggle('#content33','#img33')"><img id="img33" src="img/toggle-expand.png"></a> 3.3 MERLIN to advance NLP of learner language</h3> <div id="content33" class="content"> <p>The MERLIN corpus provides valuable data for the development and evaluation of natural language processing tools for learner language (Meurers 2012). The corpus and its meta-information on learners and ratings readily support research on automatic native language identification, enabling such research to go beyond the current English learner focus. In a similar vein, the corpus has already been used for research on automatic proficiency classification for German (Hancke 2013). The MERLIN corpus also provides richly annotated learner data for the development and adaptation of NLP tools and applications that assist language learners in improving their vocabulary usage, coherence, spelling and grammatical accuracy. </p> </div> -<p> </p> - <div id="Pub"></div> -<h2><a name="bib"></a>References <a href="#Pub" onClick="toggle('#contentPub','#imgPub')"><img id="imgPub" src="img/toggle-expand.png"></a></h2> +<h2><a name="bib"></a><a href="#Pub" onClick="toggle('#contentPub','#imgPub')"><img id="imgPub" src="img/toggle-expand.png"></a> References</h2> <div id="contentPub" class="content"> <p>[ALTE 2001] = ALTE Working Group on the Code of Practice: <em>Principles of Good Practice for ALTE Examinations. </em>Revised Draft. <a href="http://www.testdaf.de/institut/pdf/ALTE/ALTE_good_practice.pdf" target="_blank" class="reference">http://www.testdaf.de/institut/pdf/ALTE/ALTE_good_practice.pdf</a>, October 2013.<br> [Consiglio d'Europa 2004a] = Trim, J./North, B./Coste, D.: <em>Quadro comune europeo di riferimento per le lingue: apprendimento, insegnamento, valutazione</em>. La Nuova Italia: Oxford.- A cura del Consiglio d'Europa.<br> diff --git a/en/teacher.php b/en/teacher.php index 134732a..a66d68e 100644 --- a/en/teacher.php +++ b/en/teacher.php @@ -7,133 +7,151 @@ require('F_mainsidebar.php'); <div id="mainpart3"> <div id="content-menu3"> <!--INSERT--> -<h1>MERLIN for CEFR-related language learning, teaching, and testing</h1> -<h2>MERLIN showcase</h2> -<p>Introducing MERLIN for <strong>teachers</strong> | <a href="http://youtu.be/J9E1aGJ0QV8" target="_blank" class="reference">video presentation at MERLIN workshop in Linz</a>, 12/2014<br> -➜ Download usage scenarios: where are my learners | data-driven learning | material development: <a href="docs/MERLIN_Usage-scenarios_Teachers_CZE.zip" target="_blank" class="reference">Czech</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"> | <a href="docs/MERLIN_Usage-scenarios_Teachers_GER.zip" target="_blank" class="reference">German</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"> | <a href="docs/MERLIN_Usage-scenarios_Teachers_ITA.zip" target="_blank" class="reference">Italian</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"></p> -<p>Introducing MERLIN for <strong>testers and textbook authors</strong> | <a href="http://youtu.be/Z1TFFja2ZHU" target="_blank" class="reference">video presentation at MERLIN workshop in Linz</a>, 12/2014<br> -➜ Download usage scenarios: MERLIN and the CEFR | language tests, MERLIN and the CEFR | benchmarking with MERLIN | textbook analysis: <a href="docs/MERLIN_Usage-scenarios_testers-textbookauthors_CZE.zip" target="_blank" class="reference">Czech</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"> | <a href="docs/MERLIN_Usage-scenarios_testers-textbookauthors_GER.zip" target="_blank" class="reference">German</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"> | <a href="docs/MERLIN_Usage-scenarios_testers-textbookauthors_ITA.zip" target="_blank" class="reference">Italian</a> <img src="img/document-pdf.png" alt="pdf" width="16" height="16"></p> -<p> </p> -<h2>Using MERLIN for language teaching</h2> -<div id="anchor3"></div> -<h3>MERLIN in the language classroom</h3> -<a href="#anchor3" onClick="toggle('#content3', '#img3')"><img id="img3" src="img/toggle-expand.png"></a> -<p><strong>Make your students understand CEFR levels. -</strong> -<div id="content3" class="content"> -<p>You can prepare your subcorpus of MERLIN texts (e.g., sorted according to CEFR ratings) and bring it to your language classroom. Your learners can discuss strengths and weaknesses of written productions. </p> -<p> </p> -<p><strong>Make your students understand their own L2 competence with relation to CEFR levels: </strong><br> - Your learners can use the <a href="C_download.php#corpus" target="_blank" class="reference">MERLIN rating grid</a> for self-evaluation, they can do one or more <a href="C_mcorpus.php#anchor3" target="_blank" class="reference">MERLIN tasks</a>, and they can compare their performances to the subcorpus you prepared. Thus, they can more easily understand where they are in their language learning process as well. This might be more appropriate for learners from B1.</p> -<p> </p> -<table border="0" cellpadding="1" cellspacing="0"> - <tr> - <td valign="top"><img src="img/aim-icon.png" alt="aim" width="30" height="30"></td> - <td width="720" bgcolor="#CCCCCC"><p>To find written test of learners that performed on a specific CEFR-level: <span class="example"><br> - </span><span class="Stil5">Go to <a href="#" onclick="document.forms['documents'].submit();" class="reference">Define a subcorpus</a> to filter e.g. for Italian texts rated B1 and B2 on the topic "describe experiences with language learning”</span>.</p></td> - </tr> -</table> -<p> </p> +<h1>Language learning, teaching, and testing with MERLIN</h1> +<p>Here, we present several use cases for the MERLIN corpus. The scenarios relate to teaching practice, but also to the design of teaching materials as well as to the planning of courses and textbooks. Testers will find some practical examples in the last section.</p> +<h2 dir="ltr">1 MERLIN for teachers and material writers</h2> +<div id="anchor1"></div> +<h3 dir="ltr"><a href="#anchor1" onClick="toggle('#content1','#img1')"><img id="img1" src="img/toggle-expand.png"></a> Integrate MERLIN texts into the classroom </h3> +<div id="content1" class="content"> +<p dir="ltr"><strong>Objective</strong>: Your students assess their own L2 writing competence with relation to CEFR levels and thus better understand where they are in the language learning process.</p> -<p><strong>Bring the platform to the classroom:</strong><br> - You can also let your (advanced) students look for language phenomena in the MERLIN corpus by themselves in order to familiarize them with the technology and enhance their autonomy in language learning. They could do peer-group error analyses of MERLIN samples, but also of texts of their own. You could have them compare MERLIN data with a native speaker corpus to illustrate differences in language use. </p> -<p> </p> +<p dir="ltr"><strong>Scenario</strong>: Choose MERLIN texts that match the CEFR level of your learners or a specific test tasks that fits thematically with the course content</p> +<p dir="ltr">Your learners ...</p> +<ul> + <li dir="ltr" aria-level="1"> + discuss strengths and weaknesses of an example text from the MERLIN corpus and compare it with the target hypothesis. </li> + <li dir="ltr" aria-level="1"> + work on a <a href="C_mcorpus.php#tasks" target="_blank">MERLIN task</a> and compare their own productions with the MERLIN texts. </li> + <li>evaluate each other's text productions with the help of <a href="C_download.php#corpus" target="_blank" class="a.reference">MERLIN rating grid</a> (recommended from level B1).<br /> + </li> +</ul> +<p dir="ltr"><strong>And here's how:</strong></p> +<ol> + <li dir="ltr" aria-level="1"> + Download a suitable <strong>↘</strong> <a href="C_mcorpus.php#tasks" target="_blank">task</a>. </li> + <li dir="ltr" aria-level="1"> + Download the <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip" target="_blank" class="a.reference">Merlin corpus</a> and choose <em><strong> ↘ meta_ltext_TH</strong></em> (learner texts including target hypotheses). </li> + <li>Search for texts with this task [<em><strong>↘</strong></em> <em><strong><a href="\start.php#anchor3" target="_blank">Filter texts with the file manager</a></strong></em>].</li> +</ol> </div> -<p> </p> <div id="anchor2"></div> -<h3>“Hands-on” for material writers</h3> -<a href="#anchor2" onClick="toggle('#content2', '#img2')"><img id="img2" src="img/toggle-expand.png"></a> -<p><strong>Explore crucial aspects of language learning, such as learners' use of collocations, verbal aspect, and mood, etc. and find suitable examples for your own materials.</strong></p> +<h3 dir="ltr"><a href="#anchor2" onClick="toggle('#content2','#img2')"><img id="img2" src="img/toggle-expand.png"></a> Assess the learning progress of your learners</h3> <div id="content2" class="content"> -<p>You can then use data from the corpus to add usage notes to your materials, e.g. hints on correct use of a structure or suggestions to avoid the overuse or underuse of words or structures.</p> -<p> </p> -<table border="0" cellpadding="1" cellspacing="0"> - <tr> - <td valign="top"><span class="Stil5"><img src="img/aim-icon.png" alt="aim" width="30" height="30" align="top"></span></td> - <td width="720" valign="top" bgcolor="#CCCCCC"><p>To find examples for the wrong usage of a specific structure, e.g. verbal aspect in Czech, by native speakers of German: <br> - <span class="Stil5">1. Define and save a subcorpus of Czech texts written by authors with L1=German <a href="#" onclick="document.forms['documents'].submit();" class="reference">Define a subcorpus</a> <br> -2. Make a frequency list of verbal aspect errors in Czech for your subcorpus <strong><a href="#" onclick="document.forms['statistics'].submit();" class="reference">Statistics</a></strong></span></p></td> - </tr> - <tr> - <td valign="top"> </td> - <td width="720" valign="top" bgcolor="#CCCCCC"><p>To search for a word in learner texts and explore how learners use it, and which errors are related to it: <br> - <span class="Stil5">Use the <a href="#" onclick="document.forms['simple'].submit();" class="reference">Simple search</a>, type e.g. "Wohnung" in the search field and choose "Search in target hypothesis".</span></p></td> - </tr> - <tr> - <td> </td> - <td width="720" valign="top" bgcolor="#CCCCCC"><p>To explore the use of a specific structure: <br> - <span class="Stil5">Use the <a href="#" onclick="document.forms['advanced'].submit();" class="reference">Advanced search</a> to search for all instances of the verb “warten” (lemma 1) followed by a preposition (POS 2 = preposition)</span></p></td> - </tr> -</table> -<p> </p> -<p>Many teaching materials, including the vast majority of textbooks, claim to be related to the CEFR, but they do not make use of authentic learner language data. In addition, often learners proficiency comes in a profile, so that a learner might be more successful in grammar than, for example, in vocabulary.</p> - -<p><strong>Use MERLIN to explore </strong>these<strong> different aspects of learners' communicative L2 competence</strong>, e.g. vocabulary range/control, grammatical accuracy, coherence/cohesion, on different CEFR levels and develop your own materials tailored to your students.</p> -<p> </p> -<table border="0" cellpadding="1" cellspacing="0"> - <tr> - <td valign="top"><span class="Stil5"><img src="img/aim-icon.png" alt="aim" width="30" height="30" align="top"></span></td> - <td width="720" valign="top" bgcolor="#CCCCCC"><p>To get an impression of what texts with a CEFR-related rating of these dimensions of language proficiency look like: <span class="Stil5">Create a subcorpus of texts with vocabulary control | grammatical accuracy | coherence and cohesion rated B2 <a href="#" onclick="document.forms['documents'].submit();" class="reference">Define a subcorpus</a></span></p></td> - </tr> -</table> -<p> </p> +<p dir="ltr"><strong>Objective</strong>: Compare your group with learners at the same or different competence levels</p> +<p dir="ltr"><strong>Scenario</strong>: Use the MERLIN texts to check ...</p> +<ul> + <li dir="ltr" aria-level="1"> what difficulties other learners at the same competence level are having</li> + <li dir="ltr" aria-level="1"> where your learners are in comparison to learners with the same or a different L1.</li> +</ul> +<p dir="ltr"><strong>And here's how:</strong></p> +<ol> + <li dir="ltr" aria-level="1"> Download the <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip" target="_blank" class="a.reference">Merlin corpus</a> and choose <strong>↘</strong><strong><em> meta_ltext_TH</em></strong> (learner texts including target hypotheses).</li> + <li>Search in your file manager for texts at a specific level, e.g. B1, and with a specific mother tongue, e.g. Russian [<em><strong>↘ entrance page↘</strong></em> <em><strong>Filter texts with the file manager</strong></em>]. </li> +</ol> +<p><img src="img/hint_bulb.png" alt="hint bulb" /><span class="StilSmall"> You can use MERLIN to explore different aspects of learners' communicative L2 competence</strong>, e.g. vocabulary range/control, grammatical accuracy, coherence/cohesion, at different CEFR levels. For learners proficiency often comes in a profile, so that a learner might be more successful in grammar than, for example, in vocabulary.</span></p> </div> -<div id="anchor1"></div> -<h3>Syllabus and curriculum development </h3> <a href="#anchor1" onClick="toggle('#content1', '#img1')"><img id="img1" src="img/toggle-expand.png"></a> -<p>Most syllabi, curricula and even national educational standards in Europe refer to the CEFR. ... </p> -<div id="content1" class="content"> -<p>Nevertheless, often it is not well understood what learner language on these levels is like. </p> -<p> <strong>MERLIN helps you to concretely identify typical & relevant milestones / errors in learner language with reference to CEFR levels.</strong> It can thus support decisions about the selection and progression of syllabus / curriculum contents. </p> -<p> </p> -<table border="0" cellpadding="1" cellspacing="0"> - <tr> - <td valign="top"><img src="img/aim-icon.png" alt="aim" width="30" height="30"></td> - <td width="720" bgcolor="#CCCCCC"><p>To get a general impression of what B1 texts look like: <span class="example"><br> - </span><span class="Stil5">Create your own corpus of texts extracted from Italian tests rated B1</em> <a href="#" onclick="document.forms['documents'].submit();" class="reference">Define a subcorpus</a></span></p></td> - </tr> - <tr> - <td> </td> - <td width="720" bgcolor="#CCCCCC"><p>To find out typical problems learners have on a specific CEFR level: <br> - <span class="Stil5">Compile a list of frequent learner language features, e.g. grammatical errors <strong><a href="#" onclick="document.forms['statistics'].submit();" class="reference">Statistics</a></strong></span><br> - </p></td> - </tr> -</table> -<p> </p> +<div id="anchor3"></div> +<h3 dir="ltr"><a href="#anchor3" onClick="toggle('#content3','#img3')"><img id="img3" src="img/toggle-expand.png"></a> Examine features, irregularities and errors in your learner's texts </h3> +<div id="content3" class="content"> +<p dir="ltr"><strong>Objective</strong>: Check incorrect use, overuse or underuse of words and structures </p> +<p dir="ltr"><strong>Scenarios</strong>:</p> +<ul> + <li dir="ltr" aria-level="1">Errors in the use of a particular structure appear repeatedly in your learners' texts, e.g. <em>mít rád</em> + 'accusative object' in Czech. You want to examine whether this phenomenon is typical and in which contexts it occurs.</li> + <li dir="ltr" aria-level="1">You suggest that there is a correlation between the overuse or inappropriate use of a structure/lexical unit and the learner's mother tongue. For example, you observe the overuse of <em>schon </em>(<em>already, yet</em>) in learners with Polish as their L1 in German. You want to verify your assumption.</li> +</ul> +<p dir="ltr"><strong>And here's how:</strong></p> +<ol> + <li dir="ltr" aria-level="1">Open the <a href="https://merlin-platform.eu/annis/" target="_blank" class="a.reference">ANNIS search</a>.</li> + <li dir="ltr" aria-level="1">Enter the word or lemma you are looking for ["schon"].</li> + <li dir="ltr" aria-level="1">Choose the corpus [Czech, German, Italian] and click <strong>↘</strong> <em><strong>Search</strong></em>.</li> + <li dir="ltr" aria-level="1">To search only texts written by Polish L1 speakers, enter additionally into the search field: <em><strong>& meta::_author_L1="Polish"</strong></em> .</li> +</ol> +<p><img src="img/hint_bulb.png" alt="hint bulb" /><span class="StilSmall"> For further explanations see <a href="#" onclick="document.forms['glossary'].submit();"><?php echo $trans['help_search'][$_SESSION['lang']];?></a></span>.</p> </div> -<p> </p> <div id="anchor4"></div> -<h2>Using MERLIN for language testing <a href="#anchor4" onClick="toggle('#content4', '#img4')"><img id="img4" src="img/toggle-expand.png"></a></h2> +<h3 dir="ltr"><a href="#anchor4" onClick="toggle('#content4','#img4')"><img id="img4" src="img/toggle-expand.png"></a> Find examples in the MERLIN corpus for teaching materials </h3> <div id="content4" class="content"> -<p>Most European language tests are (or claim to be) related to the CEFR. While the Council of Europe provides numerous <a href="http://www.coe.int/t/dg4/linguistic/cadre1_en.asp" target="_blank" class="reference">helpful materials</a>, there is not yet much empirical data (i.e. CEFR-related language samples) to support the test development process, especially for languages other than English (for English, see <a href="http://www.englishprofile.org/" target="_blank" class="reference">www.englishprofile.org</a>). </p> - - <p>We believe the MERLIN data help to enhance transparency and quality in test construction. MERLIN is useful for familiarization with the CEFR, and it can be used for benchmarking purposes. It can be used for empirically based development of assessment materials.</p> -<p> Furthermore, MERLIN data lends itself to the empirical validation of the CEFR scales (see <a href="C_research.php" class="reference">MERLIN for research</a>) and might be helpful for empirically based rating scale construction.</p> - <p><strong>You can use MERLIN in your institutions to create a common understanding of the CEFR levels and to practice rating procedures of written texts.</strong></p> - <p> </p> - <table border="0" cellpadding="1" cellspacing="0"> - <tr> - <td valign="top"><img src="img/aim-icon.png" alt="aim" width="30" height="30"></td> - <td bgcolor="#CCCCCC"><p>To extract a random sample of written tests on a specific tasks: <span class="example"><br> - </span><span class="Stil5">Go to <a href="#" onclick="document.forms['documents'].submit();" class="reference">Define a subcorpus</a> and filter for tests on a specific task topic, e.g. "andare a rovare un amico"</span></span></p></td> - </tr> - <tr> - <td> </td> - <td bgcolor="#CCCCCC"><p>To adjust the rating behavior among your teacher colleagues: <br> - <span class="Stil5">Have the example texts re-rated by your colleagues using the<strong><a href="C_download.php#corpus" target="_blank"> MERLIN rating grid</a>. </strong>The results can be discussed in the group and they can be compared to the MERLIN ratings.</span><br> - </p></td> - </tr> - </table> - <p> </p> -<h3>Links </h3> -<p>Council of Europe (2011). <a href="http://www.coe.int/t/dg4/linguistic/Cadre1_en.asp" target="_blank" class="reference">Common European Framework of Reference for: Learning, Teaching, Assessment</a>. Council of Europe.</p> -<p>The English profile: <a href="http://www.englishprofile.org/" target="_blank" class="reference">www.englishprofile.org</a></p> -<p>Council of Europe materials supporting the use of the CEFR: <a href="http://www.coe.int/t/dg4/linguistic/Cadre1_en.asp " title="http://www.coe.int/t/dg4/linguistic/Cadre1_en.asp " target="_blank" class="reference">http://www.coe.int/t/dg4/linguistic/Cadre1_en.asp </a></p> - -<p> </p> +<p dir="ltr"><strong>Objective</strong>: Explore crucial aspects of language learning, such as learners' use of collocations, verbal aspect, and mood, etc. and find suitable examples for your own materials.</p> +<p dir="ltr"><strong>Scenarios</strong>:</p> +<ul> + <li dir="ltr" aria-level="1">You want to illustrate the command of word order in the German main clause. </li> + <li dir="ltr" aria-level="1">You search for examples of incorrect use of a certain structure to add usage notes to your materials or suggestions to avoid its overuse or underuse. </li> + <li dir="ltr" aria-level="1">Your search for typical errors related to the use of a certain word or structure to identify practice focusses and find examples sentences.</li> + <li dir="ltr" aria-level="1">You want to analyze and compare texts with regard to certain phenomena of learner language.</li> +</ul> +<p dir="ltr"><strong>And here's how:</strong></p> +<ol> + <li dir="ltr" aria-level="1">Review in the <a href="#annotations" onclick="document.forms['glossary'].submit();"><?php echo $trans['help_search'][$_SESSION['lang']];?></a> in section 2 <strong>↘</strong> <strong><em>the list of annotated learner language features</em></strong> for the ones that interest you.</li> + <li dir="ltr" aria-level="1">Open the <a href="https://merlin-platform.eu/annis/" target="_blank" class="a.reference">ANNIS search</a> to search for those features, e.g. <em>G_Refl_type</em>. Proceed as described here <a href="#" onclick="document.forms['glossary'].submit();"><?php echo $trans['help_search'][$_SESSION['lang']];?></a>.</li> + <li dir="ltr" aria-level="1">Now view the search results. Under <strong>↘</strong> <em><strong> full text</strong></em> you can explore the feature in context and copy phrases and sentences from the learner's text.</li> +</ol> +</div> +<h2 dir="ltr">2 MERLIN for curriculum design and course planning</h2> +<div id="anchor5"></div> +<h3 dir="ltr"><a href="#anchor5" onClick="toggle('#content5','#img5')"><img id="img5" src="img/toggle-expand.png"></a> General overview of the text productions at a certain CEFR level </h3> +<div id="content5" class="content"> +<p dir="ltr"><strong>Objective / Scenario</strong>: You want to get a general overview of the text productions at a certain CEFR level.</p> +<p dir="ltr"><strong>And here's how</strong>: To get a general impression of e.g. B1 texts, sort or filter the MERLIN texts by files that received a B1 rating (fair average). Proceed as described on the <strong>↘</strong> <em><strong>entrance page</strong></em> under <strong>↘</strong> <em><strong>Filter texts with the file manager</strong></em>].</p> </div> -<!--INSERT END--> +<div id="anchor6"></div> +<h3 dir="ltr"><a href="#anchor6" onClick="toggle('#content6','#img6')"><img id="img6" src="img/toggle-expand.png"></a> Identify relevant milestones in learner language</h3> +<div id="content6" class="content"> +<p dir="ltr"><strong>Objective</strong>: Identify relevant milestones of the L2 or typical errors relating them to the levels of the CEFR and facilitate decisions about the selection and sequencing of content in the curriculum and syllabus.</p> +<p dir="ltr"><strong>Scenarios</strong></p> +<ul> + <li dir="ltr" aria-level="1">You want to examine which phenomena of learner language are especially relevant to the addressed CEFR level of the textbook or to the course level. You could do so by exploring MERLIN texts for phenomena that learners typically implement incorrectly at a certain CEFR level, but do apparently master at the level above.</li> + <li dir="ltr" aria-level="1">In turn, you might want to verify whether learners master a phenomenon or typical learner "problem" already from a certain level and thus has not to be focused any longer.</li> + <li dir="ltr" aria-level="1">You would like to check which phenomena should be taken up again and again across different levels, e.g. by exploring which feature/error occurs at all proficiency levels in the MERLIN corpus.</li> +</ul> +<p dir="ltr"><strong>And here's how:</strong></p> +<ol> + <li dir="ltr" aria-level="1"> Use the <a href="https://merlin-platform.eu/annis/" target="_blank" class="a.reference">ANNIS search</a> and the <strong>↘</strong> <em><strong>Query Builder</strong></em> to obtain information on the frequency of a feature/error for a distinct group of texts (e.g. writtten productions that received a B1 rating). Proceed as described under <a href="#" onclick="document.forms['glossary'].submit();" class="a.reference"><?php echo $trans['help_search'][$_SESSION['lang']];?></a>.</li> + <li dir="ltr" aria-level="1"> After launching the query (<em><strong>↘ Search</strong></em>) you can see the number of the query results below the search window, z. B. <em>1460 matches in 244 documents</em>.</li> + <li>To get an impression of the frequency of phenomena within a given feature category, e.g. all features related to grammar, use the <strong>↘</strong> <em><strong>Frequency Analysis</strong></em> in ANNIS as described under <a href="#" onclick="document.forms['glossary'].submit();" class="a.reference"><?php echo $trans['help_search'][$_SESSION['lang']];?></a> <strong>↘</strong> <em><strong> 4 Retrieve statistical information</strong></em>. </li> +</ol> +</div> +<h2 dir="ltr">3 MERLIN for language testing</h2> +<div id="anchor7"></div> +<h3 dir="ltr"><a href="#anchor7" onClick="toggle('#content7','#img7')"><img id="img7" src="img/toggle-expand.png"></a> Using the CEFR systematically in test development </h3> +<div id="content7" class="content"> +<p dir="ltr">Most European language tests are (or claim to be) related to the CEFR. While the Council of Europe provides numerous <a href="http://www.coe.int/t/dg4/linguistic/cadre1_en.asp" target="_blank" class="reference">helpful materials</a>, there is not yet much empirical data (i.e. CEFR-related language samples) to support the test development process, especially for languages other than English (an overview of the available reference level descriptions can be found on <a href="https://www.coe.int/en/web/common-european-framework-reference-languages/reference-level-descriptions-rlds-developed-so-far" target="_blank" class="reference">web pages of the Council of Euope</a>; for English see <a href="http://www.englishprofile.org/" target="_blank" class="reference">www.englishprofile.org</a>). </p> +<p dir="ltr">We believe that MERLIN data ...</p> +<ul> + <li dir="ltr" aria-level="1">can help to enhance transparency and quality in CEFR-related test construction for Czech, German, Italian.</li> + <li dir="ltr" aria-level="1">can be used for benchmarking purposes.</li> + <li dir="ltr" aria-level="1">can foster empirically based development of assessment materials.</li> +</ul> +<p dir="ltr">Furthermore, MERLIN data lends itself to the empirical validation of the CEFR scales (see <a href="C_research.php" target="_blank">MERLIN for research</a>) and might be helpful for empirically based rating scale construction.<br /> +</p> +</div> +<div id="anchor8"></div> +<h3 dir="ltr"><a href="#anchor8" onClick="toggle('#content8','#img8')"><img id="img8" src="img/toggle-expand.png"></a> Adjusting the understanding of CEFR levels among raters</h3> +<div id="content8" class="content"> +<p dir="ltr"><strong>Objective</strong>: Create a common understanding of the CEFR levels in you institution </p> +<p dir="ltr"><strong>Scenarios</strong>: Practise rating procedures of written L2 productions with your colleagues with the help of the MERLIN tasks and texts and compare your ratings with those of the MERLIN team.</p> +<ul> + <li dir="ltr" aria-level="1"> Extract a random sample of written tests on a specific task, e.g. "Neujahrsbrief an einen Freund schreiben" and have your colleague re-rate the texts using the <a href="C_download.php#corpus" target="_blank">MERLIN rating grid</a>. </li> + <li dir="ltr" aria-level="1"> Discuss your results and compare them to MERLIN ratings.</li> +</ul> +<p dir="ltr"><strong>And here's how</strong>:</p> +<p dir="ltr">How to find learner texts for a specific task.</p> +<ol> + <li dir="ltr" aria-level="1"> Select the appropriate task in <a href="C_mcorpus.php#tasks" target="_blank">the overview</a> and download it.</li> + <li dir="ltr" aria-level="1"> Download the <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip" target="_blank">Merlin corpus</a> and choose <strong>↘</strong> <em><strong>meta_ltext_TH</strong></em> (learner texts with target hypothesis).</li> + <li>Search the file manager for texts with this task [<em><strong>↘ Entrance page↘</strong></em> <strong><em>Filter texts with the file manager</em></strong>].</li> +</ol> +</div> +<h2>Links </h2> +<p>Council of Europe (2011). <a href="https://www.coe.int/en/web/common-european-framework-reference-languages" target="_blank" class="reference">Common European Framework of Reference for Languages: Learning, Teaching, Assessment</a>. Council of Europe 2001<br /> + Council of Europe (2018). <a href="https://rm.coe.int/cefr-companion-volume-with-new-descriptors-2018/1680787989 "target="_blank" class="reference"> Common European Framework of Reference for Languages: Learning, Teaching, Assessment. Companion volume with new descriptors</a>. Council of Europe 2018<br /> + <a href="https://www.coe.int/en/web/platform-plurilingual-intercultural-language-education/home" target="_blank" class="reference"> +Platform of resources and references for plurilingual and intercultural education +</a><br /> + <a href="http://www.englishprofile.org/" target="_blank" class="reference">The English profile</a> + <!--INSERT END--> +</p> </div> </div> </div> -</div> \ No newline at end of file +</div> diff --git a/it/start.php b/it/start.php index ace4162..bb25e40 100644 --- a/it/start.php +++ b/it/start.php @@ -11,7 +11,7 @@ <h2>1 Scaricare i testi e le risorse MERLIN</h2> <p>Puoi scaricare il corpus integrale (2.286 testi) nei seguenti formati:</p> <ul> - <li><a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip?sequence=3&isAllowed=y" class="a.reference"> â— come file di testo</a> <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip?sequence=3&isAllowed=y" dir="ltr"><img src="img/icon_txt.png" alt="txt" width="13" height="16" /></a> inclusi target hypothesis e metadati come età , sesso, prima lingua, nome della traccia e valutazione </li> + <li><a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip?sequence=3&isAllowed=y" class="a.reference">come file di testo</a> <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-text-v1.1.zip?sequence=3&isAllowed=y" dir="ltr"><img src="img/icon_txt.png" alt="txt" width="13" height="16" /></a> inclusi target hypothesis e metadati come età , sesso, prima lingua, nome della traccia e valutazione </li> <li><a href="https://gitlab.inf.unibz.it/commul/merlin-platform/merlin-exmaralda/tags/v1.1" dir="ltr" class="a.reference">file di trascrizione nel formato EXMARaLDA</a></li> <li>nei formati <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-paula-v1.1.zip?sequence=6&isAllowed=y" class="a.reference">PAULA </a>e <a href="https://clarin.eurac.edu/repository/xmlui/bitstream/handle/20.500.12124/6/merlin-annis-v1.1.zip?sequence=7&isAllowed=y" class="a.reference">ANNIS</a></li> </ul> -- GitLab