<!--
  This XSLT module provides default templates for indexing.

  Custom index files should generally import this module and extend
  templates as necessary.

  The source XML follows the structure below:

  ```
    <index-data mediatype="[mediatype]">
      <document level="metadata"> ... </document>
      <workflow> ... </workflow>
      <content source="tika"> ... </content>
    </index-data>
  ```

  @author Christophe Lauret
  @version 5.9000
-->
<xsl:transform  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  version="2.0" xmlns:ps="http://www.pageseeder.com/editing/2.0"
                xmlns:psf="http://www.pageseeder.com/function"
	              xmlns:xs="http://www.w3.org/2001/XMLSchema"
                xmlns:xhtml="http://www.w3.org/1999/xhtml"
                exclude-result-prefixes="#all">

<!-- Common fields are defined here using XSLT functions -->
<xsl:import href="common.xsl" />

<xsl:output method="xml" indent="no" encoding="utf-8" doctype-public="-//Weborganic//DTD::Flint Index Documents 5.0//EN"
            doctype-system="http://weborganic.org/schema/flint/index-documents-5.0.dtd"/>

<!-- Sent by the index processor -->
<xsl:param name="psid-field-name" select="'psid'" />
<xsl:param name="psid-field-value" />
<xsl:param name="groupname" />

<!-- indexXFields group property -->
<xsl:param name="ps-indexXFields" select="'false'" />

<!-- TEMPLATES ============================================================ -->

<!--
  Default template for document `index-data`.

  Expected structure:
  ```
    <index-data mediatype="[mediatype]">
      <document level="metadata"> ... </document>
      <workflow > ... </workflow>
      <content source="tika"> ...  </content>
    </index-data>
  ```
-->
<xsl:template match="index-data[document]">
  <documents version="5.0">
    <document>

      <!-- Standard fields -->
      <xsl:apply-templates select="document"                 mode="ixml"/>
      <xsl:apply-templates select="workflow"                 mode="ixml"/>
      <xsl:apply-templates select="publications/publication" mode="ixml"/>
      <xsl:apply-templates select="content"                  mode="ixml"/>
      <xsl:apply-templates select="extra"                    mode="ixml"/>

      <!-- Any other custom fields -->
      <xsl:call-template name="custom-fields" />

    </document>
  </documents>
</xsl:template>

<!--
  Default fields generated for a `<document>`.
-->
<xsl:template match="document" mode="ixml">
  <!-- We pre-compute some fields to make reading the fields more e -->
  <xsl:variable name="folder"    select="replace(documentinfo/uri/@decodedpath, '/[^/]*?$', '')"/>
  <xsl:variable name="mediatype" select="documentinfo/uri/@mediatype"/>

  <!-- Core fields -->
  <xsl:sequence select="psf:ps-id($psid-field-name, $psid-field-value)"/>
  <xsl:sequence select="psf:ps-title(documentinfo/uri/displaytitle)"/>
  <xsl:sequence select="psf:ps-type(if (ends-with($mediatype, 'folder')) then 'folder' else if (@version != 'current') then 'documentversion' else 'document')"/>
  <xsl:sequence select="psf:ps-subtype(psf:to-subtype(if (@version != 'current') then 'documentversion' else $mediatype))"/>
  <xsl:sequence select="psf:ps-mediatype($mediatype)"/>
  <!-- some very old files may not have a date -->
  <xsl:if test="documentinfo/uri/@created or documentinfo/uri/@modified">
    <xsl:sequence select="psf:ps-modifieddate(xs:dateTime(if (documentinfo/uri/@modified) then documentinfo/uri/@modified else documentinfo/uri/@created))"/>
  </xsl:if>

  <!-- Document fields-->
  <xsl:sequence select="psf:ps-filename(substring(documentinfo/uri/@decodedpath, string-length($folder)+2))"/>
  <xsl:sequence select="psf:ps-folder($folder)"/>
  <xsl:sequence select="psf:ps-size(documentinfo/uri/@size)"/>
  <xsl:sequence select="psf:ps-docid(documentinfo/uri/@docid)"/>
  <xsl:sequence select="psf:ps-description(documentinfo/uri/description)"/>
  <xsl:sequence select="psf:ps-labels(documentinfo/uri/labels)"/>
  <xsl:sequence select="psf:ps-owned(starts-with(documentinfo/uri/@path, concat('/ps/',translate($groupname,'-','/'),'/')))"/>


  <!-- Versions info -->
  <xsl:variable name="latestversion" select="documentinfo/versions/version[@id = max(../version/xs:integer(@id))]" />
  <!-- Leave an empty field so documents with no version can be searched -->
  <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pslatestversion" doc-values="sorted"><xsl:value-of select="$latestversion/@name" /></field>
  <xsl:if test="$latestversion">
    <xsl:sequence select="psf:ps-latestversiondate($latestversion/@created)"/>
  </xsl:if>

  <!-- Created date, use version's if we're indexing one -->
  <xsl:choose>
    <xsl:when test="@version != 'current'">
      <xsl:variable name="version" select="documentinfo/versions/version[@name = current()/@version]"/>
      <xsl:sequence select="psf:ps-createddate($version/@created)"/>
      <!-- used by DeleteReleaseQuery.java -->
      <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psversionid" doc-values="sorted"><xsl:value-of select="$version/@id" /></field>
    </xsl:when>
    <xsl:when test="documentinfo/uri/@created">
      <xsl:sequence select="psf:ps-createddate(documentinfo/uri/@created)"/>
    </xsl:when>
  </xsl:choose>

  <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psxrefcount" numeric-type="int" doc-values="sorted"><xsl:value-of select="count(//xref | //blockxref)"/></field>
  <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreversexrefcount" numeric-type="int" doc-values="sorted"><xsl:value-of select="count(//reversexref)"/></field>

  <!-- XRef labels -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//xref/@labels|//blockxref/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psxreflabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>
  <xsl:for-each select="distinct-values(tokenize(string-join(//reversexref/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreversexreflabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>
  <!-- XRef types -->
  <xsl:for-each select="distinct-values(//xref/@type|//blockxref/@type)">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psxreftype" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>
  <xsl:for-each select="distinct-values(//reversexref/@forwardtype)">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psreversexreftype" doc-values="sorted-set"><xsl:value-of select="." /></field>
  </xsl:for-each>
  <!-- Image labels -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//image/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psimagelabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>
  <!-- Link labels -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//link/@labels, ','), ',')[. != ''])">
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pslinklabel" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>
  <!-- Metadata property names -->
  <xsl:for-each select="distinct-values(tokenize(string-join(//metadata/*//property[@name]/@name, ','), ',')[. != ''])">
    <xsl:sort select="." />
    <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="psmetadataname" doc-values="sorted-set"><xsl:value-of select="normalize-space(.)" /></field>
  </xsl:for-each>

  <!-- PageSeeder Metadata -->
  <xsl:for-each select="metadata//property">
    <xsl:sequence select="psf:ps-metadata(.)"/>
  </xsl:for-each>

</xsl:template>

<!--
  Generate the fields from the workflow.

  ```
  <workflow status="[status]" duedate="[ISO8601]" priority="[priority]" statuschanged="[ISO8601]">
    <assignedto>
      <fullname></fullname>
    </assignedto>
  </content>
  ```
-->
<xsl:template match="workflow" mode="ixml">
  <xsl:sequence select="psf:ps-status(@status)"/>
  <xsl:sequence select="psf:ps-duedate(@due)"/>
  <xsl:sequence select="psf:ps-priority(@priority)"/>
  <xsl:sequence select="psf:ps-assignedto(assignedto/fullname)"/>
  <xsl:if test="assignedto/@id">
    <xsl:sequence select="psf:ps-assignedtoid(assignedto/@id)"/>
  </xsl:if>
  <xsl:sequence select="psf:ps-statuschangeddate(@statuschanged)"/>
</xsl:template>

<!--
  Generate the fields from content extracted by Tika.

  By default, this template will
   - retrieve the content from the body
   - store any metadata as `x-*` fields from the `<meta/>` tags in the header.

  Typically, this template can be extended to ignore of deal with some fields
  in a specific manner.

  ```
  <content source="tika">
    <html xmlns="http://www.w3.org/1999/xhtml">
      <head> ... </head>
      <body> ... </body>
    </html>
  </content>
  ```
-->
<xsl:template match="content[@source='tika']" mode="ixml">
  <!-- Content extracted by Tika -->
  <xsl:sequence select="psf:ps-content-xhtml(descendant::xhtml:body)"/>

  <xsl:for-each select="descendant::xhtml:head">

    <!-- OOXML fields -->
    <xsl:if test="starts-with(/index-data/@mediatype, 'application/vnd.openxmlformats-officedocument')">

      <!-- Media dates -->
      <xsl:sequence select="psf:psmedia-createddate(xhtml:meta[@name='dcterms:created']/@content)" />
      <xsl:sequence select="psf:psmedia-modifieddate(xhtml:meta[@name='dcterms:modified']/@content)" />

    </xsl:if>

    <!-- Extra metadata reported by Tika -->
    <xsl:for-each select="xhtml:meta[not(@name = 'X-Parsed-By' or @name = 'Content-type')][$ps-indexXFields = 'true']">
      <xsl:sort select="lower-case(@name)"/>
      <xsl:sequence select="psf:x-field(@name, @content)"/>
    </xsl:for-each>

  </xsl:for-each>
</xsl:template>

<!--
  Generate the fields from file content.

  By default, the indexed content is a concatenation of all the text nodes.

  Typically, this template can be extended to process the content differently.

  ```
  <content source="file">
    ...
  </content>
  ```
-->
<xsl:template match="content[@source='file']" mode="ixml">
  <xsl:sequence select="psf:ps-content(normalize-space(string-join(descendant::text(), ' ')))"/>
</xsl:template>


<!-- CUSTOM FIELDS FALLBACK TEMPLATE ====================================== -->

<!--
    Override this template in the XSLT module for your mediatype to add
    custom fields for whole document or document sections using the following
    format:

    <field store="true" name="my_field">field contents</field>

    Custom fields should not start with `ps` as it is reserved for PageSeeder
    fields.

    @context index-data
 -->
<xsl:template name="custom-fields" />

</xsl:transform>