#!/usr/bin/env python

"""
Parser.py   $Id: Parser.py,v 1.15 2001/10/18 19:13:18 janssen Exp $


Copyright 1999,2000 by Holger Duerer <holly@starship.python.net>

Distributable under the GNU General Public License Version 2 or newer.
"""

from PyPlucker import TextParser, ImageParser, PluckerDocs


unknown_things = {}


def default_parser (url, headers, data, config, maxwidth, maxheight, image_bits_per_pixel):
    try:
        url = str (url) # convert to string if this is still a Url.ULR
        type = headers['content-type']
        if type == "text/html":
            parser = TextParser.StructuredHTMLParser (url, data, headers, config)
            for item in parser.get_unknown ():
                if unknown_things.has_key (item):
                    unknown_things[item].append (url)
                else:
                    unknown_things[item] = [url]
            return parser.get_plucker_doc (), 0
        elif type == "text/plain":
            parser = TextParser.PlainTextParser (url, data, headers, config)
            return parser.get_plucker_doc (), 0
        elif type == "mailto/text":
            # These are easy to handle, the document does it itself, so no
            # parsing needed as we generate the document directly
            return PluckerDocs.PluckerMailtoDocument (url), 0
        elif type[:6] == "image/":
            # this can fail, as some parsers do not recognize all image types...
            parser = ImageParser.get_default_parser(config)
            parsed = parser (url, type, data, config, maxwidth, maxheight, bpp=image_bits_per_pixel)
            return parsed.get_plucker_doc (), parsed.scaled ()
        else:
            print "%s type not yet handled" % type
            return None, 0
    except RuntimeError, text:
        print "Runtime error parsing document %s: %s" % (url, text)
        return None, 0
    except AssertionError, text:
        print "Assertion error parsing document %s: %s" % (url, text)
        return None, 0
    except:
        import traceback
        print "Unknown error parsing document %s:" % url
        traceback.print_exc ()
        return None, 0
