Normalizing Excel’s SpreadsheetML using XSLT – Part 2

As reported by one of the comments, there was a bug in the XSLT transformation which « normalizes » Excel’s SpreadsheetML documents that I had posted in a previous post.

I have fixed this bug and the new version is:

<?xml version="1.0"?>
<!--

Adapted from http://ewbi.blogs.com/develops/2004/12/normalize_excel.html

This product may incorporate intellectual property owned by Microsoft Corporation. The terms
and conditions upon which Microsoft is licensing such intellectual property may be found at
http://msdn.microsoft.com/library/en-us/odcXMLRef/html/odcXMLRefLegalNotice.asp.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns="urn:schemas-microsoft-com:office:spreadsheet"
    xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet">
    <xsl:output method="xml" indent="no" encoding="UTF-8"/>
    <xsl:template match="/">
        <xsl:apply-templates select="node()" mode="normalize"/>
    </xsl:template>
    <xsl:template match="@*|node()" mode="normalize">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()" mode="normalize"/>
        </xsl:copy>
    </xsl:template>
    <xsl:template match="ss:Cell/@ss:Index" mode="normalize"/>
    <xsl:template match="ss:Cell" name="copy" mode="normalize">
        <xsl:copy>
            <xsl:apply-templates select="@*" mode="normalize"/>
            <xsl:variable name="prevCells" select="preceding-sibling::ss:Cell"/>
            <xsl:variable name="nbPrecedingIndexes"
                select="count(preceding-sibling::ss:Cell[@ss:Index])"/>
            <xsl:attribute name="ss:Index">
                <xsl:choose>
                    <xsl:when test="@ss:Index">
                        <xsl:value-of select="@ss:Index"/>
                    </xsl:when>
                    <xsl:when test="count($prevCells) = 0">
                        <xsl:value-of select="1"/>
                    </xsl:when>
                    <xsl:when test="$nbPrecedingIndexes > 0">
                        <xsl:variable name="precedingCellsSinceLastIndex"
                            select="preceding-sibling::ss:Cell[count(preceding-sibling::ss:Cell[@ss:Index]|self::ss:Cell[@ss:Index]) = $nbPrecedingIndexes]"/>
                        <xsl:value-of
                            select="preceding-sibling::ss:Cell[@ss:Index][1]/@ss:Index +
                            count($precedingCellsSinceLastIndex)
                            + sum($precedingCellsSinceLastIndex/@ss:MergeAcross)
                            - count ($precedingCellsSinceLastIndex[@ss:MergeAcross])"
                        />
                    </xsl:when>
                    <xsl:otherwise>
                        <xsl:value-of
                            select="count($prevCells) + 1 +
                            sum($prevCells/@ss:MergeAcross) -count($prevCells/@ss:MergeAcross)"
                        />
                    </xsl:otherwise>
                </xsl:choose>
            </xsl:attribute>
            <xsl:apply-templates select="node()" mode="normalize"/>
        </xsl:copy>
    </xsl:template>
</xsl:stylesheet>
            

I have also written the following set of tests (using XSLTUnit):

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:ss="urn:schemas-microsoft-com:office:spreadsheet" xmlns:exsl="http://exslt.org/common"
  extension-element-prefixes="exsl" xmlns:xsltu="http://xsltunit.org/0/"
  exclude-result-prefixes="exsl">
  <xsl:import href="excelNormalize.xsl"/>
  <xsl:import href="xsltunit.xsl"/>
  <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes"/>
  <xsl:template match="/">
    <xsltu:tests>
      <xsltu:test id="noIndexes">
        <xsl:variable name="input">
          <ss:Row>
            <ss:Cell>A</ss:Cell>
            <ss:Cell>B</ss:Cell>
            <ss:Cell>C</ss:Cell>
            <ss:Cell>D</ss:Cell>
            <ss:Cell>E</ss:Cell>
          </ss:Row>
        </xsl:variable>
        <xsl:call-template name="xsltu:assertEqual">
          <xsl:with-param name="id">noIndexes</xsl:with-param>
          <xsl:with-param name="nodes1">
            <xsl:apply-templates select="exsl:node-set($input)/ss:Row" mode="normalize"/>
          </xsl:with-param>
          <xsl:with-param name="nodes2">
            <ss:Row>
              <ss:Cell ss:Index="1">A</ss:Cell>
              <ss:Cell ss:Index="2">B</ss:Cell>
              <ss:Cell ss:Index="3">C</ss:Cell>
              <ss:Cell ss:Index="4">D</ss:Cell>
              <ss:Cell ss:Index="5">E</ss:Cell>
            </ss:Row>
          </xsl:with-param>
        </xsl:call-template>
      </xsltu:test>
      <xsltu:test id="withIndexes">
        <xsl:variable name="input">
          <ss:Row>
            <ss:Cell ss:Index="1">A</ss:Cell>
            <ss:Cell ss:Index="2">B</ss:Cell>
            <ss:Cell ss:Index="3">C</ss:Cell>
            <ss:Cell ss:Index="4">D</ss:Cell>
            <ss:Cell ss:Index="5">E</ss:Cell>
          </ss:Row>
        </xsl:variable>
        <xsl:call-template name="xsltu:assertEqual">
          <xsl:with-param name="id">withIndexes</xsl:with-param>
          <xsl:with-param name="nodes1">
            <xsl:apply-templates select="exsl:node-set($input)/ss:Row" mode="normalize"/>
          </xsl:with-param>
          <xsl:with-param name="nodes2" select="$input"/>
        </xsl:call-template>
      </xsltu:test>
      <xsltu:test id="firstIndex">
        <xsl:variable name="input">
          <ss:Row>
            <ss:Cell ss:Index="5">A</ss:Cell>
            <ss:Cell>B</ss:Cell>
            <ss:Cell>C</ss:Cell>
            <ss:Cell>D</ss:Cell>
            <ss:Cell>E</ss:Cell>
          </ss:Row>
        </xsl:variable>
        <xsl:call-template name="xsltu:assertEqual">
          <xsl:with-param name="id">firstIndex</xsl:with-param>
          <xsl:with-param name="nodes1">
            <xsl:apply-templates select="exsl:node-set($input)/ss:Row" mode="normalize"/>
          </xsl:with-param>
          <xsl:with-param name="nodes2">
            <ss:Row>
              <ss:Cell ss:Index="5">A</ss:Cell>
              <ss:Cell ss:Index="6">B</ss:Cell>
              <ss:Cell ss:Index="7">C</ss:Cell>
              <ss:Cell ss:Index="8">D</ss:Cell>
              <ss:Cell ss:Index="9">E</ss:Cell>
            </ss:Row>
          </xsl:with-param>
        </xsl:call-template>
      </xsltu:test>
      <xsltu:test id="altIndexes">
        <xsl:variable name="input">
          <ss:Row>
            <ss:Cell ss:Index="2">A</ss:Cell>
            <ss:Cell>B</ss:Cell>
            <ss:Cell ss:Index="5">C</ss:Cell>
            <ss:Cell>D</ss:Cell>
            <ss:Cell>E</ss:Cell>
          </ss:Row>
        </xsl:variable>
        <xsl:call-template name="xsltu:assertEqual">
          <xsl:with-param name="id">altIndexes</xsl:with-param>
          <xsl:with-param name="nodes1">
            <xsl:apply-templates select="exsl:node-set($input)/ss:Row" mode="normalize"/>
          </xsl:with-param>
          <xsl:with-param name="nodes2">
            <ss:Row>
              <ss:Cell ss:Index="2">A</ss:Cell>
              <ss:Cell ss:Index="3">B</ss:Cell>
              <ss:Cell ss:Index="5">C</ss:Cell>
              <ss:Cell ss:Index="6">D</ss:Cell>
              <ss:Cell ss:Index="7">E</ss:Cell>
            </ss:Row>
          </xsl:with-param>
        </xsl:call-template>
      </xsltu:test>
      <xsltu:test id="noIndexesMergeAcross">
        <xsl:variable name="input">
          <ss:Row>
            <ss:Cell>A</ss:Cell>
            <ss:Cell ss:MergeAcross="2">B</ss:Cell>
            <ss:Cell>C</ss:Cell>
            <ss:Cell ss:MergeAcross="3">D</ss:Cell>
            <ss:Cell>E</ss:Cell>
          </ss:Row>
        </xsl:variable>
        <xsl:call-template name="xsltu:assertEqual">
          <xsl:with-param name="id">noIndexesMergeAcross</xsl:with-param>
          <xsl:with-param name="nodes1">
            <xsl:apply-templates select="exsl:node-set($input)/ss:Row" mode="normalize"/>
          </xsl:with-param>
          <xsl:with-param name="nodes2">
            <ss:Row>
              <ss:Cell ss:Index="1">A</ss:Cell>
              <ss:Cell ss:MergeAcross="2" ss:Index="2">B</ss:Cell>
              <ss:Cell ss:Index="4">C</ss:Cell>
              <ss:Cell ss:MergeAcross="3" ss:Index="5">D</ss:Cell>
              <ss:Cell ss:Index="8">E</ss:Cell>
            </ss:Row>
          </xsl:with-param>
        </xsl:call-template>
      </xsltu:test>
      <xsltu:test id="withIndexesMergeAcross">
        <xsl:variable name="input">
          <ss:Row>
            <ss:Cell ss:Index="5" ss:MergeAcross="2">A</ss:Cell>
            <ss:Cell>B</ss:Cell>
            <ss:Cell ss:Index="10">C</ss:Cell>
            <ss:Cell ss:MergeAcross="3">D</ss:Cell>
            <ss:Cell>E</ss:Cell>
          </ss:Row>
        </xsl:variable>
        <xsl:call-template name="xsltu:assertEqual">
          <xsl:with-param name="id">withIndexesMergeAcross</xsl:with-param>
          <xsl:with-param name="nodes1">
            <xsl:apply-templates select="exsl:node-set($input)/ss:Row" mode="normalize"/>
          </xsl:with-param>
          <xsl:with-param name="nodes2">
            <ss:Row>
              <ss:Cell ss:Index="5" ss:MergeAcross="2">A</ss:Cell>
              <ss:Cell ss:Index="7">B</ss:Cell>
              <ss:Cell ss:Index="10">C</ss:Cell>
              <ss:Cell ss:MergeAcross="3" ss:Index="11">D</ss:Cell>
              <ss:Cell ss:Index="14">E</ss:Cell>
            </ss:Row>
          </xsl:with-param>
        </xsl:call-template>
      </xsltu:test>
    </xsltu:tests>
  </xsl:template>
</xsl:stylesheet>
            

These tests should help to understand what this transformation is doing.

Thanks continue to report bugs and feature requests as comments.

5 thoughts on “Normalizing Excel’s SpreadsheetML using XSLT – Part 2”

  1. I get this error in Netbeans 6.5.1 when I run the code that I will also paste in this comment.
    file:/H:/NetBeansProjects/Roosterdiensten/src/nl/prhc/roosterdiensten/resources/normalize-excel-sheet.xslt: line 29: Attribute ‘ss:Index’ outside of element.

    Code:
    File xmlFile = new File(« H:\\TEMP\\rooster-sal.xml »);
    File xsltFile = new File(« H:\\NetBeansProjects\\Roosterdiensten\\src\\nl\\prhc\\roosterdiensten\\resources\\normalize-excel-sheet.xslt »);

    // JAXP reads data using the Source interface
    Source xmlSource = new StreamSource(xmlFile);
    Source xsltSource = new StreamSource(xsltFile);

    // the factory pattern supports different XSLT processors
    TransformerFactory transFact =
    TransformerFactory.newInstance();
    Transformer trans;
    try {
    trans = transFact.newTransformer(xsltSource);
    trans.transform(xmlSource, new StreamResult(new File(« H:\\TEMP\\rooster-sal-out.xml »)));
    } catch (TransformerException ex) {
    Logger.getLogger(RoosterdienstenView.class.getName()).log(Level.SEVERE, null, ex);
    }

    }

  2. Thanks very much for this.
    Would of loved to add empty Cells instead of having ss:Index.
    A bit like after you convert to CSV or tab delimited.
    Most of my excel would grow in size of course, but once the transformation is done, it is much easier to script afterwards.

Laisser un commentaire

Votre adresse e-mail ne sera pas publiée. Les champs obligatoires sont indiqués avec *