Digital humanities


Maintained by: David J. Birnbaum (djbpitt@gmail.com) [Creative Commons BY-NC-SA 3.0 Unported License] Last modified: 2021-04-30T16:48:35+0000


Test #7: SVG

The Task

Your second and third SVG assignments asked you to generate a graph of presidential election results, by using an XSLT document to transform XML to SVG. This task is similar, except the input is XML from the Edgar Allan Poe project, completed for this course in Spring 2019.

Your SVG must be valid and contain the following:

  1. Bars for each of the five poems in the input XML, whose heights corresponds to the number of "open" words in their given poems. Bars must grow up from the X axis, not down from the top.
  2. Labels! We should know what we’re looking at when we see your output, so be sure to label the graph itself and the X and Y axes. Bars should also be labeled with the name of the corresponding poem. (Hint: The input <title> element contains the full title of the poem in a legible format. For long titles, try rotating the text using @transform or render the text from top to bottom instead of left to right.)
  3. Ruling lines with corresponding values listed along the Y axis. These lines should somehow look different from the axes (i.e., a different opacity, a dashed line instead of a solid line, etc.)

Solution

We give one possible solution below, but your XSLT doesn’t need to look exactly like ours, as long as it has the necessary components and uses variables to avoid hard coding the SVG attribute values where possible.

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.w3.org/2000/svg"
  xmlns:math="http://www.w3.org/2005/xpath-functions/math" exclude-result-prefixes="#all"
  version="3.0">
  <xsl:output method="xml" indent="yes"/>
  <!-- ========================================= -->
  <!-- Width variables                           -->
  <!-- ========================================= -->
  <xsl:variable name="bar_width" as="xs:double" select="25"/>
  <xsl:variable name="spacing" as="xs:double" select="$bar_width div 2"/>
  <xsl:variable name="max_width" as="xs:double"
    select="count(//poem) * ($bar_width + $spacing) + $spacing"/>
  <!-- ========================================= -->
  <!-- Height variables                          -->
  <!-- ========================================= -->
  <xsl:variable name="max_count" as="xs:double"
    select="max(//poem ! count(descendant::*[@value = 'open']))"/>
  <xsl:variable name="y_scale" as="xs:double" select="5"/>
  <!-- ========================================= -->
  <!-- Templates                                 -->
  <!-- ========================================= -->
  <xsl:template match="/">
    <svg height="450" width="500">
      <g transform="translate(100, {$max_count * $y_scale + 100})">
        <!-- ============================= -->
        <!-- Main title                    -->
        <!-- ============================= -->
        <text x="{$max_width div 2}" y="-{$max_count * $y_scale + 50}" text-anchor="middle"
          >Open Language Across Poems</text>
        <!-- ============================= -->
        <!-- Ruling lines                  -->
        <!-- ============================= -->
        <xsl:for-each select="0 to $max_count idiv 10">
          <xsl:variable name="y_count" select=". * 10"/>
          <xsl:variable name="y_pos" as="xs:double" select="$y_count * $y_scale"/>
          <line x1="0" y1="-{$y_pos}" x2="{$max_width}" y2="-{$y_pos}" stroke="lightgray"/>
          <text x="-10" y="-{$y_pos}" text-anchor="end" dominant-baseline="middle"
            fill="gray" font-size="smaller">
            <xsl:value-of select="$y_count"/>
          </text>
        </xsl:for-each>
        <!-- ============================= -->
        <!-- Poems                         -->
        <!-- ============================= -->
        <xsl:apply-templates select="//poem">
        <!-- ============================= -->
        <!-- X axis and label              -->
        <!-- ============================= -->
        <line x1="0" x2="{$max_width}" y1="0" y2="0" stroke="black" stroke-linecap="square"/>
        <text x="{$max_width div 2}" y="100" text-anchor="middle">Poem</text>
        <!-- ============================= -->
        <!-- Y axis and label              -->
        <!-- ============================= -->
        <line x1="0" x2="0" y1="0" y2="-{$max_count * $y_scale}" stroke="black"
          stroke-linecap="square"/>
        <text x="-50" y="{(-$max_count * $y_scale) div 2}"
          transform="rotate(270, -50, {(-$max_count * $y_scale) div 2})"
          text-anchor="middle">Word Count</text>
      </g>
    </svg>
  </xsl:template>
  <xsl:template match="poem">
    <!-- ===================================== -->
    <!-- Template variables                    -->
    <!-- ===================================== -->
    <xsl:variable name="x_pos" as="xs:double"
      select="(position() - 1) * ($bar_width + $spacing) + $spacing"/>
    <xsl:variable name="open_count" as="xs:integer"
      select="count(descendant::*[@value = 'open'])"/>
    <xsl:variable name="height" as="xs:double" select="$open_count * $y_scale"/>
    <!-- ===================================== -->
    <!-- Create bars and labels                -->
    <!-- ===================================== -->
    <rect x="{$x_pos}" width="{$bar_width}" y="-{$height}" height="{$height}" fill="red"/>
    <text x="{$x_pos + $bar_width div 2}" y="-{$height + 5}" text-anchor="middle"
      font-size="smaller" fill="gray">
      <xsl:value-of select="$open_count"/>
    </text>
    <text x="{$x_pos}" y="15" text-anchor="start" font-size="x-small"
      transform="rotate(30, {$x_pos}, 15)">
      <xsl:value-of select="meta/title"/>
    </text>
  </xsl:template>
</xsl:stylesheet>

Our output looks like the following:

Open Language Across Poems 0 10 20 30 40 7 A DREAM WITHIN A DREAM 25 ANNABEL LEE 7 ELDORADO 21 THE CITY IN THE SEA. 47 THE RAVEN Poem Word Count

Discussion

General organization

We begin by creating stylesheet variables, that is, variables that we want to be available anywhere in the stylesheet (these are sometimes also called global variables). Stylesheet variables are different from template variables, which are declared inside templates and are available only within the template where they are created. Stylesheet variables are those that will have the same value no matter what poem we’re looking at, such as the maximum width or maximum height of the graph. Template variables will be different for each poem, such as the number of "open" elements in a particular poem or the X and Y coordinates at which we plot a poem-specific bar and label.

We then draw the components according to their z-order, that is, so that where they overlap, the more recent component will mask the earlier one. For example, where ruling lines and bars cross, we want the bars to mask the ruling lines; we don’t want the ruling lines to cross over the bars. This means that we draw, in order, ruling lines, bars, and axes (the axes overlap the bottom of the bars). Some components, such as most of the labels, do not overlap with anything, and could therefore be drawn at any time.

Stylesheet variables

We create stylesheet variables that represent the width of our bars and the space between the bars. The spacing is a function of the width, so that if we later want to change the width, the spacing will change proportionately, and we won’t have to change an additional value manually. We use those variables to calculate the maximum width of the graph by finding the number of poems in the document and multiplying to determine how much horizontal space their bars and interbar spacing would take up. The $max_width value becames the width of the X axis. We calculate the maximum height by finding the highest count of "open" elements in a single poem and adding one $y_scale unit for padding. We then multiply this value by $y_scale to arrive at the height of the Y axis. Specifying a variable to scale the Y axis, instead of using the same fixed number repeatedly during our plotting, means that if we later decide we want to stretch or shrink the Y dimension, we can change just the variable and all of the Y positions in our graph will adjust themselves automatically, in a synchronized, coordinated way.

SVG text objects do not know their own own size. For example, in our solution the poem titles are rendered diagonally under the bars, but SVG does not provide an easy way of computing the X and Y position of the ends of the titles. This, in turn, makes it difficult to compute the total height of the SVG graph, which we need for the @height attribute on the <svg> element (and also to position the X-axis label, which reads Poem, under the poem titles). In this case we found an appropriate value by trial and error and hard-coded it. With a stand-alone SVG document we don’t care if it’s too long, but because we wanted to be able to embed our SVG in a web page, we needed to set a @height value that would avoid having too much or too little vertical whitespace at the bottom of the SVG, before the continuation of the text.

Ruling lines and their labels

We use <xsl:for-each select="0 to $max_count idiv 10"> to plot ruling lines at Y values that are multiples of 10. The idiv operator performs integer division, that is, whole-number division. How many ruling lines to plot and where to plot them are up to you, but we found that multiples of ten were close enough to be informative while also spread out enough not to be distracting, and multiples of ten are whole-number values with which humans are likely to be comfortable. We plot the ruling lines in light gray to help foreground the data; the light gray lines are simultaneously unobtrusive and sufficiently visible to help users understand the bar height. We used the @text-anchor attribute to align the Y labels at their right sides, and the @dominant-baseline attribute to center them at their Y positions. We also made the labels gray and used a small font to reduce their obtrusiveness as a way of foregrounding the actual data. We found that using the color "lightgray" was appropriate for the ruling lines, but too light for the text, so we made the text "gray".

Generating bars

To produce the bars (without which there would be no bar graph), we apply templates to the <poem> elements. The template first computes template variables, that is, variables that have different values for each poem, and then plots a rectangle, a poem title, and the count of "open" elements for that poem. The X position of poem-specific items is based on their offset in the sequence of <poem> elements to which we’re applying templates, so that the first poem has a position() value of 1, the second poem of 2, etc. We used gray and smaller text for the labels and we rotated the titles to help them fit better into the graph.

Optional bonus

One of your optional bonus tasks was to add the data for "closed" words into the same graph. We did this by creating a grouped bar chart (see below). You may have chosen a different visualisation, which is fine; your XSLT doesn’t have to look exactly like ours, as long as it represents the information in a way that makes intuitive sense for those who will want to read and understand the graph.

We need to tell SVG to generate two bars for every poem, one for "open" elements and one for "closed" elements. Adding this functionality required the following changes:

  1. We added a $group_width stylesheet variable that was equal to the total horizontal space occupied by each poem, that is, the two bars (one for "open" and one for "closed") plus the interbar spacing.
  2. We modified $max_count to compute, separately, the counts of "open" and "closed" values for each poem, and used the max() function to find the largest of those ten counts.
  3. We used <xsl:sort> to sort the poems and apply templates to them from earliest to latest by date.
  4. Within the template that processed poems we counted both the "open" and "closed" elements, which we used to plot the different heights of the two bars per poem. We modified the computation of the horizontal position to take into consideration that the spacing between poems had to allow for two bars instead of just one.
  5. Since we were sorting by date, we added the date to the label for each pair of bars. We used the XPath format-date() function to turn the ISO date into a human-friendly one, and we used the <tspan> element for the two parts of the label (title, date) so that we could coordinate their positioning more easily. You can read about these features in Kay.
  6. We colored each bar differently so they would be distinguishable and added a legend to indicate which color represented which attribute value. We plotted the legend inside its own <g> (group) element so that we could compute the positions from 0,0, which made for easier arithmetic, and then we moved the entire group by using the SVG translate() function inside the @transform attribute on the <g>.
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns="http://www.w3.org/2000/svg"
    xmlns:math="http://www.w3.org/2005/xpath-functions/math" exclude-result-prefixes="#all"
    version="3.0">
    <xsl:output method="xml" indent="yes"/>
    <!-- ========================================= -->
    <!-- Width variables                           -->
    <!-- ========================================= -->
    <xsl:variable name="bar_width" as="xs:double" select="25"/>
    <xsl:variable name="spacing" as="xs:double" select="$bar_width div 2"/>
    <xsl:variable name="group_width" as="xs:double" select="2 * $bar_width + $spacing"/>
    <xsl:variable name="max_width" as="xs:double" select="count(//poem) * ($group_width) + $spacing"/>
    <!-- ========================================= -->
    <!-- Height variables                          -->
    <!-- ========================================= -->
    <xsl:variable name="max_count" as="xs:double"
        select="max(//poem ! (count(descendant::*[@value = 'open']), count(descendant::*[@value = 'closed'])))"/>
    <xsl:variable name="y_scale" as="xs:double" select="5"/>
    <!-- ========================================= -->
    <!-- Templates                                 -->
    <!-- ========================================= -->
    <xsl:template match="/">
        <svg height="500" width="500">
            <g transform="translate(100, {$max_count * $y_scale + 100})">
                <!-- ============================= -->
                <!-- Main title                    -->
                <!-- ============================= -->
                <text x="{$max_width div 2}" y="-{$max_count * $y_scale + 50}" text-anchor="middle"
                    >Open Language Across Poems</text>
                <!-- ============================= -->
                <!-- Ruling lines                  -->
                <!-- ============================= -->
                <xsl:for-each select="0 to $max_count idiv 10">
                    <xsl:variable name="y_count" select=". * 10"/>
                    <xsl:variable name="y_pos" as="xs:double" select="$y_count * $y_scale"/>
                    <line x1="0" y1="-{$y_pos}" x2="{$max_width}" y2="-{$y_pos}" stroke="lightgray"/>
                    <text x="-10" y="-{$y_pos}" text-anchor="end" dominant-baseline="middle"
                        fill="gray" font-size="smaller">
                        <xsl:value-of select="$y_count"/>
                    </text>
                </xsl:for-each>
                <!-- ============================= -->
                <!-- Poems                         -->
                <!-- ============================= -->
                <xsl:apply-templates select="//poem">
                    <xsl:sort select="descendant::date"/>
                </xsl:apply-templates>
                <!-- ============================= -->
                <!-- X axis and label              -->
                <!-- ============================= -->
                <line x1="0" x2="{$max_width}" y1="0" y2="0" stroke="black" stroke-linecap="square"/>
                <text x="{$max_width div 2}" y="100" text-anchor="middle">Poem</text>
                <!-- ============================= -->
                <!-- Y axis and label              -->
                <!-- ============================= -->
                <line x1="0" x2="0" y1="0" y2="-{$max_count * $y_scale}" stroke="black"
                    stroke-linecap="square"/>
                <text x="-50" y="{(-$max_count * $y_scale) div 2}"
                    transform="rotate(270, -50, {(-$max_count * $y_scale) div 2})"
                    text-anchor="middle">Word Count</text>
                <!-- ============================= -->
                <!-- Legend                        -->
                <!-- ============================= -->
                <g transform="translate(200, -250)">
                    <rect x="0" y="0" width="100" height="50" stroke="black" fill="white"/>
                    <rect x="10" y="10" width="10" height="10" fill="red"/>
                    <text x="30" y="15" dominant-baseline="middle">Open</text>
                    <rect x="10" y="30" width="10" height="10" fill="blue"/>
                    <text x="30" y="35" dominant-baseline="middle">Closed</text>
                </g>
            </g>
        </svg>
    </xsl:template>
    <xsl:template match="poem">
        <!-- ===================================== -->
        <!-- Template variables                    -->
        <!-- ===================================== -->
        <xsl:variable name="x_pos" as="xs:double"
            select="(position() - 1) * ($group_width) + $spacing"/>
        <xsl:variable name="open_count" as="xs:integer"
            select="count(descendant::*[@value = 'open'])"/>
        <xsl:variable name="closed_count" as="xs:integer"
            select="count(descendant::*[@value = 'closed'])"/>
        <xsl:variable name="open_height" as="xs:double" select="$open_count * $y_scale"/>
        <xsl:variable name="closed_height" as="xs:double" select="$closed_count * $y_scale"/>
        <!-- ===================================== -->
        <!-- Create bars and labels                -->
        <!-- ===================================== -->
        <rect x="{$x_pos}" width="{$bar_width}" y="-{$open_height}" height="{$open_height}"
            fill="red"/>
        <text x="{$x_pos + $bar_width div 2}" y="-{$open_height + 5}" text-anchor="middle"
            font-size="smaller" fill="gray">
            <xsl:value-of select="$open_count"/>
        </text>
        <rect x="{$x_pos + $bar_width}" width="{$bar_width}" y="-{$closed_height}"
            height="{$closed_height}" fill="blue"/>
        <text x="{$x_pos + $bar_width + $bar_width div 2}" y="-{$closed_height + 5}"
            text-anchor="middle" font-size="smaller" fill="gray">
            <xsl:value-of select="$closed_count"/>
        </text>
        <text x="{$x_pos + $bar_width div 2}" y="5" text-anchor="start" font-size="x-small"
            transform="rotate(30, {$x_pos}, 15)">
            <tspan>
                <xsl:value-of select="meta/title"/>
            </tspan>
            <tspan x="{$x_pos + $bar_width div 2}" y="15">
                <xsl:value-of select="format-date(meta/date, '[MNn] [D], [Y]')"/>
            </tspan>
        </text>
    </xsl:template>
</xsl:stylesheet>

Our output looks like the following:

Open Language Across Poems 0 10 20 30 40 50 21 36 THE CITY IN THE SEA. April 1, 1835 47 57 THE RAVEN January 29, 1845 7 11 A DREAM WITHIN A DREAM March 31, 1849 7 11 ELDORADO April 21, 1849 25 23 ANNABEL LEE October 9, 1849 Poem Word Count Open Closed