<?xml version="1.0" encoding="UTF-8"?>
<?oxygen RNGSchema="tei_odds.rng" type="xml"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:rng="http://relaxng.org/ns/structure/1.0" xml:lang="en">
 <teiHeader>
  <fileDesc>
   <titleStmt>
    <title>Customization of TEI schema for NKJP (text level), version 0.6.4</title>
    <author>Piotr Bański and Adam Przepiórkowski</author>
   </titleStmt>
   <publicationStmt>
    <availability status="restricted">
     <p>This template file is not freely available until it gets released in the (pre)final form.</p>
    </availability>
   </publicationStmt>
   <sourceDesc>
    <p>Created from scratch, based loosely on Sebastian Rahtz's "TEI for Corpora"</p>
   </sourceDesc>
  </fileDesc>
 </teiHeader>
 <text>
  <body>
   <head> NKJP Corpus -- schema for the text layer</head>
   <p>This is the schema for the text layer of the NKJP corpus (http://www.nkjp.pl/).</p>
   <list type="glossed" n="changelog">
    <item><label>Version 0.1</label>Base: "TEI for Corpora". Removed
     <gi>typeNote</gi> from the header.</item>
    <item>
     <label>Version 0.2</label>
     <list type="bulleted">
      <item>Removed the "corpus" and "namesdates" modules.</item>
      <item>Removed numerous elements from the included modules, to
       leave as few options as possible. Some elements were retained
       for the sake of the header.</item>
      <item>Defined new content for
       <gi>body</gi>, <gi>div</gi>, and <gi>ab</gi>, in terms of direct
       references. <gi>body</gi> can now contain only a mixture of
       (<gi>div</gi> and <gi>gap</gi>) OR (<gi>ab</gi> and
       <gi>gap</gi>). <gi>div</gi> can only contain a mixture of
       <gi>ab</gi> and <gi>gap</gi>. <gi>ab</gi> can only contain text,
       <gi>gap</gi> or <gi>hi</gi> (which is present in legacy
       texts).</item>
      <item>Restricted the root to <gi>teiCorpus</gi>.</item>
     </list>
    </item>
    <item>
     <label>Version 0.3, 22 May 2009</label>
     <list type="bulleted">
      <item>Modified the header: <gi>category</gi> can now contain a
       sequence of <gi>catDesc</gi>, rather than just a single
       instance of it.</item>
      <item>Added modules "corpus" and "namesdates", for
       <gi>particDesc</gi> and <gi>listPerson</gi>.</item>
     </list>
    </item>
    <item>
     <label>Version 0.4, 04 June 2009</label>
     <list type="bulleted">
      <item>AP: Added <gi>nkjp:topic</gi> to the header.</item>
      <item>AP: Added the <att>nkjp:subcorpus</att> attribute.</item>
      <item>AP: Added the "spoken" module for <gi>recordingStmt</gi>.</item>
     </list>
    </item>
    <item>
     <label>Version 0.5, 08 July 2009</label>
     <list type="bulleted">
      <item>AP: Changed the modality ("usage") of the attribute <att>nkjp:file</att> (actually added earlier) in the header element <gi>extent</gi> to "req".</item>
      <item>AP: Reintroduced <gi>u</gi> within <gi>body</gi>, for spoken data.</item>
     </list>
    </item>
    <item>
     <label>Version 0.6, 21 July 2009</label>
     <list type="bulleted">
      <item>AP: Reintroduced sequence of <gi>p</gi> within <gi>div</gi> and within <gi>body</gi>.</item>
     </list>
    </item>
    <item>
     <label>Version 0.6.1, 28 July 2009</label>
     <list type="bulleted">
      <item>AP: General clean-up, changing order of element specifications so that ediff with other levels easier.</item>
     </list>
    </item>
    <item>
     <label>Version 0.6.2, 23 August 2009</label>
     <list type="bulleted">
      <item>AP: Removed (commented out) the modification of <gi>category</gi>; <gi>desc</gi> will be used instead of <gi>catDesc</gi> for the bilingual description of categories.</item>
      <item>AP: Added module tagdocs, so that <gi>gi</gi>, <gi>att</gi>, etc., may be used in headers.</item>
      <item>AP: Added <att>xml:lang</att> to <gi>nkjp:topic</gi>.</item>
     </list>
    </item>
    <item>
     <label>Version 0.6.3, 25 September 2009</label>
     <list type="bulleted">
      <item>AP: Modified the reportoire of possible values of <att>nkjp:subcorpus</att>.</item>
     </list>
    </item>
    <item>
     <label>Version 0.6.4, 5 May 2011</label>
     <list type="bulleted">
      <item>AP: Introduced <gi>nkjp:fsLib</gi> which may contain <gi>fLib</gi> and <gi>fvLib</gi>. Solution adapted from tei_jos.xml (Tomaž Erjavec's "JOS 100k A" corpus), earlier used in NKJP_structure.xml.</item>
      <item>AP: In connection with the above, introduced the iso-fs module.</item>
     </list>
    </item>
   </list>
   <schemaSpec ident="NKJP_text" start="teiCorpus" prefix="tei_">
    <!-- required modules -->
    <moduleRef key="header"/>
    <moduleRef key="core"/>
    <moduleRef key="corpus"/>
    <moduleRef key="spoken"/>
    <moduleRef key="namesdates"/>
    <moduleRef key="tei"/>
    <moduleRef key="textstructure"/>
    <moduleRef key="tagdocs"/>
    <!-- required to avoid Sanity Checker complaint -->
    <elementSpec ident="handNote" mode="delete" module="header"/>
    <elementSpec ident="typeNote" mode="delete" module="header"/>
    <!-- end of Sanity-Checker-related mods -->
    <!-- optional modules -->
    <moduleRef key="iso-fs"/>
    <moduleRef key="linking"/>
    <!-- linking contains <ab> -->
    <elementSpec module="linking" ident="alt" mode="delete"/>
    <elementSpec module="linking" ident="altGrp" mode="delete"/>
    <elementSpec module="linking" ident="anchor" mode="delete"/>
    <elementSpec module="linking" ident="join" mode="delete"/>
    <elementSpec module="linking" ident="joinGrp" mode="delete"/>
    <elementSpec module="linking" ident="link" mode="delete"/>
    <elementSpec module="linking" ident="linkGrp" mode="delete"/>
    <elementSpec module="linking" ident="seg" mode="delete"/>
    <elementSpec module="linking" ident="timeline" mode="delete"/>
    <elementSpec module="linking" ident="when" mode="delete"/>
    <elementSpec module="textstructure" ident="argument" mode="delete"/>
    <elementSpec module="textstructure" ident="back" mode="delete"/>
    <elementSpec module="textstructure" ident="byline" mode="delete"/>
    <elementSpec module="textstructure" ident="closer" mode="delete"/>
    <elementSpec module="textstructure" ident="dateline" mode="delete"/>
    <elementSpec module="textstructure" ident="div1" mode="delete"/>
    <elementSpec module="textstructure" ident="div2" mode="delete"/>
    <elementSpec module="textstructure" ident="div3" mode="delete"/>
    <elementSpec module="textstructure" ident="div4" mode="delete"/>
    <elementSpec module="textstructure" ident="div5" mode="delete"/>
    <elementSpec module="textstructure" ident="div6" mode="delete"/>
    <elementSpec module="textstructure" ident="div7" mode="delete"/>
    <elementSpec module="textstructure" ident="docAuthor" mode="delete"/>
    <elementSpec module="textstructure" ident="docDate" mode="delete"/>
    <elementSpec module="textstructure" ident="docEdition" mode="delete"/>
    <elementSpec module="textstructure" ident="docImprint" mode="delete"/>
    <elementSpec module="textstructure" ident="docTitle" mode="delete"/>
    <elementSpec module="textstructure" ident="epigraph" mode="delete"/>
    <elementSpec module="textstructure" ident="floatingText" mode="delete"/>
    <elementSpec module="textstructure" ident="front" mode="delete"/>
    <elementSpec module="textstructure" ident="group" mode="delete"/>
    <elementSpec module="textstructure" ident="imprimatur" mode="delete"/>
    <elementSpec module="textstructure" ident="opener" mode="delete"/>
    <elementSpec module="textstructure" ident="postscript" mode="delete"/>
    <elementSpec module="textstructure" ident="salute" mode="delete"/>
    <elementSpec module="textstructure" ident="signed" mode="delete"/>
    <elementSpec module="textstructure" ident="titlePage" mode="delete"/>
    <elementSpec module="textstructure" ident="titlePart" mode="delete"/>
    <elementSpec module="textstructure" ident="trailer" mode="delete"/>
    <elementSpec module="core" ident="add" mode="delete"/>
    <elementSpec module="core" ident="binaryObject" mode="delete"/>
    <elementSpec module="core" ident="choice" mode="delete"/>
    <elementSpec module="core" ident="corr" mode="delete"/>
    <elementSpec module="core" ident="del" mode="delete"/>
    <elementSpec module="core" ident="divGen" mode="delete"/>
    <elementSpec module="core" ident="gloss" mode="delete"/>
    <elementSpec module="core" ident="graphic" mode="delete"/>
    <elementSpec module="core" ident="l" mode="delete"/>
    <elementSpec module="core" ident="lb" mode="delete"/>
    <elementSpec module="core" ident="lg" mode="delete"/>
    <elementSpec module="core" ident="measure" mode="delete"/>
    <elementSpec module="core" ident="measureGrp" mode="delete"/>
    <elementSpec module="core" ident="milestone" mode="delete"/>
    <elementSpec module="core" ident="pb" mode="delete"/>
    <elementSpec module="core" ident="reg" mode="delete"/>
    <elementSpec module="core" ident="said" mode="delete"/>
    <elementSpec module="core" ident="sic" mode="delete"/>
    <elementSpec module="core" ident="soCalled" mode="delete"/>
    <elementSpec module="core" ident="sp" mode="delete"/>
    <elementSpec module="core" ident="speaker" mode="delete"/>
    <elementSpec module="core" ident="stage" mode="delete"/>
    <elementSpec module="core" ident="unclear" mode="delete"/>

    <!-- Changes to the module "textstructure". -->

    <elementSpec ident="body" module="textstructure" mode="change">
     <desc>Can contain either a mixture of <gi>div</gi> and
      <gi>gap</gi> elements (typical for collections of samples), or
      or a sequence of <gi>ab</gi> and <gi>gap</gi> elements (for
      single texts). AP: this is a _clean_ modification.  AP (090708):
      reintroduced <gi>u</gi>, for spoken data.  AP (090721):
      reintroduced sequence of <gi>p</gi>.</desc>
     <content>
      <rng:choice>
       <rng:oneOrMore>
        <rng:ref name="u"/>
       </rng:oneOrMore>
       <rng:oneOrMore>
        <rng:choice>
         <rng:ref name="div"/>
         <rng:ref name="gap"/>
        </rng:choice>
       </rng:oneOrMore>
       <rng:oneOrMore>
        <rng:choice>
         <rng:ref name="ab"/>
         <rng:ref name="gap"/>
        </rng:choice>
       </rng:oneOrMore>
       <rng:oneOrMore>
        <rng:choice>
         <rng:ref name="p"/>
         <rng:ref name="gap"/>
        </rng:choice>
       </rng:oneOrMore>
      </rng:choice>
     </content>
    </elementSpec>

    <elementSpec ident="div" module="textstructure" mode="change">
     <desc>Used for collections of texts. Can contain a mixture of
      <gi>ab</gi> and <gi>gap</gi> elements. Use the <att>decls</att>
      attribute to refer to the appropriate place in the header
      (typically an <gi>item</gi> element) that bears the metadata for
      the given sample. The <att>n</att> attribute can be used to hold
      the name of the number of the particular <gi>div</gi>, if
      necessary. AP: this is a _clean_ modification.  AP (090721):
      reintroduced sequence of <gi>p</gi>.</desc>
     <content>
      <rng:choice>
       <rng:oneOrMore>
        <rng:choice>
         <rng:ref name="ab"/>
         <rng:ref name="gap"/>
        </rng:choice>
       </rng:oneOrMore>
       <rng:oneOrMore>
        <rng:choice>
         <rng:ref name="p"/>
         <rng:ref name="gap"/>
        </rng:choice>
       </rng:oneOrMore>
      </rng:choice>
     </content>
    </elementSpec>

    <!-- Changes to the module "linking". -->

    <elementSpec ident="ab" module="linking" mode="change">
     <desc>The sequence between two newlines (or between sequences of
      newline+empty line, depending on the format of the input); these
      blocks become identified and headings, paragraphs, etc., in the
      structural annotation layer (ann_structure.xml). This element
      can only contain plain text with optional <gi>gap</gi> or
      <gi>hi</gi> elements (which are sometimes present in legacy
      texts). AP: this is a _clean_ modification.</desc>
     <content>
      <rng:oneOrMore>
       <rng:choice>
        <rng:text/>
        <rng:ref name="hi"/>
        <rng:ref name="gap"/>
       </rng:choice>
      </rng:oneOrMore>
     </content>
    </elementSpec>

    <!-- Only changes affecting headers below. -->

<!--     <elementSpec ident="category" module="header" mode="change"> -->
<!--      <desc>Contains one or more <gi>catDesc</gi> elements (one for -->
<!--       each relevant language); may be recursive. AP: this is an -->
<!--       _unclean_ modification.</desc> -->
<!--      <content> -->
<!--       <rng:oneOrMore> -->
<!--        <rng:ref name="catDesc"/> -->
<!--       </rng:oneOrMore> -->
<!--       <rng:zeroOrMore> -->
<!--        <rng:ref name="category"/> -->
<!--       </rng:zeroOrMore> -->
<!--      </content> -->
<!--     </elementSpec> -->

    <elementSpec ident="publicationStmt" module="header" mode="change">
     <attList>
      <attDef ident="subcorpus" mode="add" ns="http://www.nkjp.pl/ns/1.0" usage="opt">
       <desc>Possible values are: "balanced" (for text in the 300-million-word balanced subcorpus), "unbalanced" (for other texts which may be distributed by NKJP), "restricted" (for texts available only for the internal NKJP purposes, not to be distributed), "one_million" (for texts in the 1-million-word manually annotated sample).</desc> <defaultVal>restricted</defaultVal>
       <valList type="closed">
        <valItem ident="balanced"/>
        <valItem ident="unbalanced"/>
        <valItem ident="restricted"/>
        <valItem ident="one_million"/>
       </valList>
      </attDef>
     </attList>
    </elementSpec>

    <elementSpec ident="extent" module="header" mode="change">
     <attList>
      <attDef ident="file" mode="add" ns="http://www.nkjp.pl/ns/1.0" usage="req">
       <desc>Which file (level) does the extent refer to? (Possible
        values are text.xml, ann_segmentation.xml, etc.)</desc>
        <defaultVal>text.xml</defaultVal>
      </attDef>
     </attList>
    </elementSpec>

    <elementSpec ident="fsLib" ns="http://www.nkjp.pl/ns/1.0" mode="add">
     <desc>Contains feature and feature-value libraries.</desc>
     <classes>
      <memberOf key="model.encodingDescPart"/>
     </classes>
     <content>
      <rng:oneOrMore>
       <rng:choice>
        <rng:ref name="fLib"/>
        <rng:ref name="fvLib"/>
       </rng:choice>
      </rng:oneOrMore>
     </content>
    </elementSpec>

    <elementSpec ident="topic" ns="http://www.nkjp.pl/ns/1.0" mode="add">
     <desc>The topic of spoken conversation. It might seem that
      textDesc/domain could be used for this pupose, but, alas!,
      inclusion of textDesc makes it necessary to use all of its 8
      subelements (channel, constitution, derivation, etc.), while we
      only need something like domain...</desc>
     <classes>
      <memberOf key="model.profileDescPart"/>
     </classes>
     <content>
      <rng:text/>
     </content>
     <attList>
      <attDef ident="lang" mode="add" ns="http://www.w3.org/XML/1998/namespace" usage="opt">
       <desc>The usual xml:lang attribute.</desc>
       <datatype minOccurs="1" maxOccurs="1">
        <rng:ref name="data.name"/>
       </datatype>
      </attDef>
     </attList>
    </elementSpec>

   </schemaSpec>
  </body>
 </text>
</TEI>

<!-- Local Variables: -->
<!-- eval: (auto-fill-mode 0) -->
<!-- End: -->

