#!/bin/sh

# Date: Sun, 21 Dec 2003 22:16:07 +1100
# From: Lachlan Andrew <lha@users.sourceforge.net>
# To: jean-pierre ollier <ollierjp@hotmail.com>, htdig-dev@lists.sourceforge.net
# Subject: [htdig-dev] Re: htdig & xml
#
# Greetings,
#
# Have you had any luck with this?
#
# The attached script is a very basic "external converter".  You would 
# use a line like
#
# external_parser:  application/star-office->text/html
#
# It discards all formatting, but should capture the essence of the 
# documents.
#
# Let me know how you get on,
# Lachlan
#
# On Tue, 25 Nov 2003 09:12, jean-pierre ollier wrote:
# > How do i index xml documents (staroffice, openoffice). do you have
# > a special parser?


tmpdir=/tmp/sxw2html$$

mkdir $tmpdir
cp $1 $tmpdir
cd $tmpdir
unzip * > /dev/null

echo "<html>"
tr '\012' ' ' < content.xml | sed 's/<[^>]*>/ /g'
echo "</html>"

cd $HOME
rm -rf $tmpdir

