#!/usr/bin/python2.2

import xml.parsers.expat
import sys, string, re

class xml2wiki:
    def __init__(self, infile=None, outfile=None):
        self.infile = infile
	self.outfile = outfile
	self.out = None
        self.p = xml.parsers.expat.ParserCreate()
        self.p.StartElementHandler = self.start_element
        self.p.EndElementHandler = self.end_element
        self.p.DefaultHandler = self.default_handler
        self.p.CharacterDataHandler = self.char_data
        self.level = 0 # sections
	self.listlevel = 0 # lists
	self.ignore = 0
	self.ignorelist = ['articleinfo', 'indexterm', 'application']
        self.lastchars = ""
        self.vt = 0 # verbatim
	
    def feed(self, data):
        self.p.Parse(data, 0)

    def close(self):
        self.p.Parse("", 1) # end of data
        del self.p # get rid of circular references

    # sections, lists, paragraphs
    def start_article(self, attrs): self.level+=1
    def end_article(self): self.level-=1
    def start_sect1(self, attrs): self.level+=1
    def end_sect1(self): self.level-=1
    def start_sect2(self, attrs): self.level+=1
    def end_sect2(self): self.level-=1
    def start_variablelist(self, attrs): self.listlevel+=1
    def end_variablelist(self): self.listlevel-=1
    def start_varlistentry(self, attrs): self.write(" "*self.listlevel+"* ")
    def end_varlistentry(self): pass
    def start_listitem(self, attrs): self.write(" "*self.listlevel+" ")
    def end_listitem(self): pass
    def start_title(self, attrs): self.write("\n"+"="*self.level+" ")
    def end_title(self): self.write(" "+"="*self.level+"\n")
    def start_abstract(self, attrs): pass
    def end_abstract(self): self.write("\n\n")
    def start_para(self, attrs): pass
    def end_para(self): self.write("\n\n")

    # emphasis
    def start_emphasis(self, attrs): self.write("''")
    def end_emphasis(self): self.write("''")
    def start_term(self, attrs): self.write("'''")
    def end_term(self): self.write("'''")
    def start_firstterm(self, attrs): self.write("'''")
    def end_firstterm(self): self.write("'''")

    # code
    def start_programlisting(self, attrs): self.vt+=1; self.write("\n{{{")
    def end_programlisting(self): self.vt-=1; self.write("}}}\n")
    def start_screen(self, attrs): self.vt+=1; self.write("\n{{{")
    def end_screen(self): self.vt-=1; self.write("}}}\n")
    def start_markup(self, attrs): self.write("{{{")
    def end_markup(self): self.write("}}}")
    def start_command(self, attrs): self.write("{{{")
    def end_command(self): self.write("}}}")
    def start_option(self, attrs): self.write("{{{")
    def end_option(self): self.write("}}}")
    def start_filename(self, attrs): self.write("{{{")
    def end_filename(self): self.write("}}}")

    # links
    def start_ulink(self, attrs): self.write("[%s " % attrs['url'])
    def end_ulink(self): self.write("]")
    
    def start_mediaobject(self, attrs): pass
    def end_mediaobject(self): pass
    def start_imageobject(self, attrs): pass
    def end_imageobject(self): pass
    def start_imagedata(self, attrs): self.write("\nattachment:%s\n" % attrs['fileref'])
    def end_imagedata(self): self.write("\n")

    # misc
    def start_citetitle(self, attrs): pass
    def end_citetitle(self): pass
    def start_citerefentry(self, attrs): pass
    def end_citerefentry(self): pass
    def start_refentrytitle(self, attrs): self.write("'''")
    def end_refentrytitle(self): self.write("'''")
    def start_manvolnum(self, attrs): self.write("(")
    def end_manvolnum(self): self.write(")")

    def start_element(self, name, attrs):
        if name in self.ignorelist: self.ignore+=1
        if not self.ignore:
	    try:
                return getattr(self, 'start_' + name)(attrs)
            except:
	        self.write("<<<"+name+":")
	    
    def end_element(self, name):
        if not self.ignore:
	    try:
	        return getattr(self, 'end_' + name)()
            except:
	        self.write(":"+name+">>>")
        if name in self.ignorelist: self.ignore-=1 

    def char_data(self, data):
        if not self.ignore:
	    if data.startswith("\n\n") and self.lastchars.endswith("\n\n"):
	        data = data[2:]
	    elif data.startswith("\n") and self.lastchars.endswith("\n"):
	        data = data[1:]
	    if not self.vt:
	        data = ' '.join(data.strip().split())+" "
	    if len(data):
	        self.write(data)
	        if len(data)>1:
		    self.lastchars=data[-2]
                else:
		    self.lastchars=data[-1]

    def default_handler(self, data):
        pass # self.write('Default:', repr(data))

    def parse(self):
        if self.infile:
            f = open(self.infile)
	else:
	    f = sys.stdin
	if self.outfile:
	    self.out = open(self.outfile, "w")
	else:
	    self.out = sys.stdout
	    
	text = f.read()
	f.close()
	text = re.sub(r'>\s*<',r'><',text) # kill whitespace between tags
	self.feed(text)
        self.close()

    def write(self, what):
        self.out.write(what)

wiki = "/org/org.linuxwiki/data/text/"	
o = xml2wiki("DocBook-Demystification-HOWTO.xml", wiki+"DocBookDemystificationHowTo")
o.parse()

