<!--
  Indexing transformations for PDF.

  The source XML follows the structure below:

  ```
    <index-data mediatype="image/gif">
      <document level="metadata"> ... </document>
      <workflow> ... </workflow>
      <content source="tika"> ... </content>
    </index-data>
  ```

  @author Philip Rutherford

  @since 5.9400
-->
<xsl:transform  xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
                xmlns:psf="http://www.pageseeder.com/function"
                xmlns:xhtml="http://www.w3.org/1999/xhtml"
                xmlns:xs="http://www.w3.org/2001/XMLSchema"
                exclude-result-prefixes="#all">

<!-- We extend the default index module -->
<xsl:import href="index.xsl" />

<!-- indexXFields group property -->
<xsl:param name="ps-indexXFields" select="'false'" />

<!--
  Generate the fields specific to PDF from content extracted by Tika.

  ```
  <content source="tika">
    <html xmlns="http://www.w3.org/1999/xhtml">
      <head>
        <meta name="xmpTPg:NPages" content="xxx"/>
        ...
      </head>
      <body>
        [text content]
      </body>
    </html>
  </content>
  ```
-->
<xsl:template match="index-data[@mediatype='application/pdf']/content" mode="ixml" priority="2">
  <!-- Content extracted by Tika -->
  <xsl:sequence select="psf:ps-content-xhtml(descendant::xhtml:body)"/>

  <xsl:for-each select="descendant::xhtml:head">

    <!-- Page count -->
    <xsl:for-each select="xhtml:meta[@name='xmpTPg:NPages']">
      <field store="true" index="docs-and-freqs-and-positions-and-offsets" tokenize="false" name="pspagecount" numeric-type="int" doc-values="sorted">
        <xsl:value-of select="@content"/>
      </field>
    </xsl:for-each>

    <!-- Media dates -->
    <xsl:sequence select="psf:psmedia-createddate(xhtml:meta[@name='pdf:docinfo:created']/@content)" />
    <xsl:sequence select="psf:psmedia-modifieddate(xhtml:meta[@name='pdf:docinfo:modified']/@content)" />

    <!-- Metadata reported by Tika if any -->
    <xsl:for-each select="xhtml:meta[not(@name = 'X-Parsed-By' or @name = 'Content-type')][$ps-indexXFields = 'true']">
      <xsl:sort select="lower-case(@name)"/>
      <xsl:sequence select="psf:x-field(@name, @content)"/>
    </xsl:for-each>

  </xsl:for-each>
</xsl:template>

</xsl:transform>
