<!--
  Indexing transformations for PSML documents

  The source XML follows the structure below:

  ```
    <index-data mediatype="application/vnd.pageseeder.psml+xml">
      <document level="portable"> ... </document>
      <workflow> ... </workflow>
    </index-data>
  ```

  Note that the document is using level `portable` rather than `metadata`
  and therefore include the content.

  @author Christophe Lauret
  @author Jean-Baptiste Reure

  @version 5.9000
-->
<xsl:transform  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
                xmlns:psf="http://www.pageseeder.com/function"
                xmlns:xs="http://www.w3.org/2001/XMLSchema"
                exclude-result-prefixes="#all">

<!-- We extend the default index module -->
<xsl:import href="index.xsl" />

<!--
  Documents and Versions
-->
<xsl:template match="document" mode="ixml" priority="2">
  <!-- Compute the correct location of the document -->
  <xsl:variable name="filefolder" select="replace(documentinfo/uri/@decodedpath, '/[^/]*?$', '')" />
  <xsl:variable name="filename"   select="tokenize(documentinfo/uri/@decodedpath, '/')[last()]" />

  <!-- Core fields -->
  <xsl:sequence select="psf:ps-id($psid-field-name, $psid-field-value)"/>
  <xsl:sequence select="psf:ps-title(documentinfo/uri/displaytitle)"/>
  <xsl:sequence select="psf:ps-type(if (@version != 'current') then 'documentversion' else 'document')"/>
  <xsl:sequence select="psf:ps-subtype(if (@version != 'current') then 'documentversion' else 'document')"/>
  <xsl:sequence select="psf:ps-mediatype('application/vnd.pageseeder.psml+xml')"/>
  <xsl:sequence select="psf:ps-modifieddate(@date)"/>
  <xsl:sequence select="psf:psmedia-modifieddate(@date)"/>

  <!-- Document fields (no size for PSML documents!) -->
  <xsl:sequence select="psf:ps-filename($filename)"/>
  <xsl:sequence select="psf:ps-folder($filefolder)"/>
  <xsl:sequence select="psf:ps-docid(documentinfo/uri/@docid)"/>
  <xsl:sequence select="psf:ps-description(documentinfo/uri/description)"/>
  <xsl:sequence select="psf:ps-labels(documentinfo/uri/labels)"/>
  <xsl:sequence select="psf:ps-owned(starts-with(documentinfo/uri/@path, concat('/ps/',translate($groupname,'-','/'),'/')))"/>

  <!-- PSML-specific field -->
  <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psdocumenttype" doc-values="sorted"><xsl:value-of select="if (@type) then @type else 'default'"/></field>
  <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pseditable"     doc-values="sorted"><xsl:value-of select="if (@edit='false') then 'false' else 'true'"/></field>

  <xsl:variable name="lastedited" select="max((fragmentinfo/locator/@modified | fragmentinfo/@structure-modified)/xs:dateTime(.))" />
  <xsl:if test="exists($lastedited)">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" doc-values="sorted"
           name="pslastediteddate" date-format="yyyyMMddHHmmss" date-resolution="second">
      <xsl:value-of select="psf:format-date-for-lucene($lastedited)" />
    </field>
  </xsl:if>

  <xsl:variable name="xrefs" select="//xref[not(ancestor::media-fragment)]" />
  <xsl:variable name="blockxrefs" select="//blockxref[not(ancestor::media-fragment)]" />
  <xsl:variable name="reversexrefs" select="//reversexref[not(ancestor::media-fragment or ancestor::blockxref)]" />
  <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psxrefcount" numeric-type="int" doc-values="sorted"><xsl:value-of select="count($xrefs | $blockxrefs)"/></field>
  <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreversexrefcount" numeric-type="int" doc-values="sorted"><xsl:value-of select="count($reversexrefs)"/></field>
  <!-- XRef labels -->
  <xsl:for-each select="distinct-values(tokenize(string-join($xrefs/@labels|$blockxrefs/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psxreflabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>
  <xsl:for-each select="distinct-values(tokenize(string-join($reversexrefs/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreversexreflabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>
  <!-- XRef types -->
  <xsl:for-each select="distinct-values($xrefs/@type|$blockxrefs/@type)">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psxreftype" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>
  <xsl:for-each select="distinct-values($reversexrefs/@forwardtype)">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreversexreftype" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>
  <!-- XRef configs (including empty values) -->
  <xsl:for-each select="distinct-values(for $x in $xrefs|$blockxrefs return if ($x/@config) then $x/@config else '')">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psxrefconfig" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>
  <xsl:for-each select="distinct-values(for $x in $reversexrefs return if ($x/@config) then $x/@config else '')">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreversexrefconfig" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>
  <!-- Image labels -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//image[not(ancestor::media-fragment)]/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psimagelabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>
  <!-- Link labels -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//link[not(ancestor::media-fragment)]/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pslinklabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Fragment labels -->
  <xsl:for-each select="distinct-values(tokenize(string-join(section/*/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psfragmentlabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Publication info -->
  <xsl:if test="documentinfo/publication">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pspublicationroot" doc-values="sorted">true</field>
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psrootpublicationid"><xsl:value-of select="documentinfo/publication/@id" /></field>
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psrootpublicationtype" doc-values="sorted"><xsl:value-of select="documentinfo/publication/@type" /></field>
  </xsl:if>

  <!-- Versions info -->
  <xsl:variable name="latestversion" select="documentinfo/versions/version[@id = max(../version/xs:integer(@id))]" />
  <!-- Leave an empty field so documents with no version can be searched -->
  <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pslatestversion" doc-values="sorted"><xsl:value-of select="$latestversion/@name" /></field>
  <xsl:if test="$latestversion">
    <xsl:sequence select="psf:ps-latestversiondate($latestversion/@created)"/>
  </xsl:if>

  <!-- Created date, use version's if we're indexing one -->
  <xsl:choose>
    <xsl:when test="@version != 'current'">
      <xsl:variable name="version" select="documentinfo/versions/version[@name = current()/@version]"/>
      <xsl:sequence select="psf:ps-createddate($version/@created)"/>
      <xsl:sequence select="psf:psmedia-createddate($version/@created)"/>
      <!-- used by DeleteReleaseQuery.java -->
      <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psversionid" doc-values="sorted"><xsl:value-of select="$version/@id" /></field>
    </xsl:when>
    <xsl:when test="documentinfo/uri/@created">
      <xsl:sequence select="psf:ps-createddate(documentinfo/uri/@created)"/>
      <xsl:sequence select="psf:psmedia-createddate(documentinfo/uri/@created)"/>
    </xsl:when>
  </xsl:choose>

  <!-- Block label names -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//block[not(ancestor::media-fragment)][@label]/@label, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psblocklabelname" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Inline label names -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//inline[not(ancestor::media-fragment)][@label]/@label, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psinlinelabelname" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Property names -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//property[not(ancestor::media-fragment or ancestor::metadata)][@name]/@name, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pspropertyname" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Read only inline labels -->
  <xsl:for-each select="distinct-values(tokenize(string-join((.[@edit='false']/section | //section[@edit='false'])//inline[not(ancestor::media-fragment)][@label]/@label, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreadonlyinline" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Read only property names -->
  <xsl:for-each select="distinct-values(tokenize(string-join((.[@edit='false']/section | //section[@edit='false'])//property[not(ancestor::media-fragment)][@name]/@name, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreadonlyproperty" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Metadata property names -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//property[not(ancestor::media-fragment or ancestor::blockxref)][@name]/@name, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psmetadataname" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Placeholders -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//placeholder[not(ancestor::media-fragment)][@name]/@name, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psplaceholder" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- Fragment types -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//section/*[@type]/@type, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psfragmenttype" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- List roles (including empty value) -->
  <xsl:for-each select="distinct-values(for $x in (//list|//nlist)[not(ancestor::media-fragment)] return if ($x/@role) then $x/@role else '')">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pslistrole" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>

  <!-- Table roles (including empty value) -->
  <xsl:for-each select="distinct-values(for $x in //table[not(ancestor::media-fragment)] return if ($x/@role) then $x/@role else '')">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pstablerole" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>

  <!-- Preformat roles (including empty value) -->
  <xsl:for-each select="distinct-values(for $x in //preformat[not(ancestor::media-fragment)] return if ($x/@role) then $x/@role else '')">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pspreformatrole" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>

  <!-- Content -->
  <xsl:variable name="fullcontent" as="xs:string*">
    <xsl:apply-templates select="section" mode="ixml"/>
  </xsl:variable>
  <xsl:sequence select="psf:ps-content(normalize-space(string-join($fullcontent, ' ')))"/>

  <!-- Prefix content -->
  <xsl:for-each select="(//heading | //para[@prefix])[not(ancestor::media-fragment)]">
    <xsl:sequence select="psf:ps-prefixcontent(@prefix, normalize-space(.))"/>
  </xsl:for-each>

  <!-- Reference special content -->
  <xsl:apply-templates select="descendant::property[not(ancestor::media-fragment)]" mode="ixml"/>
  <xsl:apply-templates select="descendant::inline[not(ancestor::media-fragment)]"   mode="ixml"/>
  <xsl:apply-templates select="descendant::block[not(ancestor::media-fragment)]"    mode="ixml"/>
  <xsl:apply-templates select="descendant::image[not(ancestor::media-fragment)]"    mode="ixml"/>

</xsl:template>

<!-- TEMPLATES FOR INDIVIDUAL FIELDS ========================================================== -->

<!--
    Include all fragment content.
-->
<xsl:template match="fragment" mode="ixml">
  <xsl:value-of select="normalize-space(string-join(.//text()[not(ancestor::documentinfo or ancestor::locator)], ' '))"/>
</xsl:template>

<!--
  Include all xref content.
-->
<xsl:template match="xref-fragment" mode="ixml">
  <xsl:value-of select="normalize-space(string-join(.//text()[not(ancestor::documentinfo or ancestor::locator)], ' '))"/>
</xsl:template>

<!--
  Include all property title or names and values.
-->
<xsl:template match="properties-fragment" mode="ixml">
  <xsl:value-of separator=" ">
    <xsl:for-each select="property">
      <xsl:value-of select="if (@title) then @title else @name"/>
      <xsl:text> </xsl:text>
      <xsl:value-of select="string-join(@value|*[not(self::markdown or self::xref)]//text(), ' ')"/>
      <xsl:value-of select="string-join(xref//text()[not(ancestor::documentinfo or ancestor::locator)], ' ')"/>
      <xsl:value-of select="psf:clean-markdown(markdown)"/>
      <xsl:text> </xsl:text>
    </xsl:for-each>
  </xsl:value-of>
</xsl:template>

<!--
  Include all text nodes.
-->
<xsl:template match="media-fragment" mode="ixml">
  <!-- TODO: process different depending on media type? -->
  <xsl:value-of select="normalize-space(string-join(descendant::text(), ' '))"/>
</xsl:template>


<!-- TEMPLATES FOR INDIVIDUAL FIELDS ========================================================== -->

<!-- Images -->
<xsl:template match="image" mode="ixml">
  <xsl:if test="not(starts-with(@src, 'data:'))">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psimagesrc" doc-values="none"><xsl:value-of select="@src" /></field>
  </xsl:if>
  <xsl:if test="@alt != ''">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" name="psimagealt" tokenize="true"><xsl:value-of select="@alt"/></field>
  </xsl:if>
</xsl:template>

<!-- Inline labels -->
<xsl:template match="inline" mode="ixml">
  <xsl:if test="matches(., '[^\s^&#160;]')">
  <xsl:variable name="fieldname" select="concat('psinline-',replace(@label, '[^-\w]', '_'))" />
  <xsl:choose>
    <!-- We try to parse date inline as dates -->
    <xsl:when test="ends-with(@label, '-date')">
      <xsl:variable name="date">
        <xsl:analyze-string select="." regex="(\d{{0,2}})(st|nd|rd|th)?\W*(\w+)\W*(\d{{4}})">
          <xsl:matching-substring>
            <xsl:value-of select="regex-group(4), psf:month-name-to-number(regex-group(3)), format-number(number(regex-group(1)), '00')" separator="-"/>
          </xsl:matching-substring>
        </xsl:analyze-string>
      </xsl:variable>
      <xsl:variable name="full-date" select="if (ends-with($date, 'NaN')) then replace($date, 'NaN$', '01') else $date" />
      <xsl:choose>
        <!-- check ISO format e.g. 2002-03-27 -->
        <xsl:when test="normalize-space(.) castable as xs:date">
          <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false"
                 name="{$fieldname}" date-format="yyyy-MM-dd" date-resolution="day" doc-values="sorted-set">
            <xsl:value-of select="normalize-space(.)" />
          </field>
        </xsl:when>
        <!-- check written format e.g. 27th March 2002 -->
        <xsl:when test="$full-date castable as xs:date">
          <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false"
                 name="{$fieldname}" date-format="yyyy-MM-dd" date-resolution="day" doc-values="sorted-set">
            <xsl:value-of select="$full-date" />
          </field>
        </xsl:when>
        <xsl:otherwise>
          <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="{$fieldname}"
                 doc-values="sorted-set"><xsl:value-of select="normalize-space(string-join(.//text(), ' '))" /></field>
        </xsl:otherwise>
      </xsl:choose>
    </xsl:when>
    <!-- Analyze fields ending in '-text' -->
    <xsl:when test="ends-with(@label, '-text')">
      <field store="true"  index="docs-and-freqs-and-positions-and-offsets" tokenize="true" name="{$fieldname}">
        <xsl:value-of select="string-join(.//text(), ' ')"/>
      </field>
    </xsl:when>
    <xsl:otherwise>
      <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="{$fieldname}"
             doc-values="sorted-set"><xsl:value-of select="normalize-space(string-join(.//text(), ' '))" /></field>
    </xsl:otherwise>
  </xsl:choose>
  </xsl:if>
</xsl:template>

<!-- Convert month name to number -->
<xsl:function name="psf:month-name-to-number" as="xs:string">
  <xsl:param name="name" as="xs:string"/>
  <xsl:variable name="months" as="xs:string*" select="'january', 'february', 'march', 'april', 'may',
      'june', 'july', 'august', 'september', 'october', 'november', 'december'"/>
  <xsl:sequence select="format-number(index-of($months, lower-case($name)), '00')"/>
</xsl:function>


<!-- Block labels -->
<xsl:template match="block" mode="ixml">
  <xsl:if test="matches(., '[^\s^&#160;]')">
    <field store="false" index="docs-and-freqs-and-positions-and-offsets" tokenize="true" name="psblock-{replace(@label, '[^-\w]', '_')}">
      <xsl:value-of select="string-join(descendant::text(), ' ')" />
    </field>
  </xsl:if>
</xsl:template>

<!-- Properties -->
<xsl:template match="property" mode="ixml">
<xsl:choose>
  <xsl:when test="ancestor::metadata">
    <!-- don't include transcluded metadata -->
    <xsl:if test="not(ancestor::blockxref)">
      <xsl:sequence select="psf:ps-metadata(.)"/>
    </xsl:if>
  </xsl:when>
  <xsl:otherwise>
    <xsl:sequence select="psf:ps-property(.)"/>
  </xsl:otherwise>
</xsl:choose>
</xsl:template>

</xsl:transform>