from django import http
import sre

class ZapDotHtmlMiddleware(object):
    """Removes trailing .htm and .html extensions from incoming URLs
    (GETs only) so that a legacy site ported to Django can continue to
    support existing bookmarks. Locate in settings.MIDDLEWARE_CLASSES near
    CommonMiddleware (similar middleware stack location requirements)."""
    # Dave Rowell, Appropriate Solutions, Inc., www.appropriatesolutions.com

    def __init__(self):
        # RE match for .htm or .html at the end of the url, possibly
        # followed by /, but not including it. Compile once, use many.
        self.re_trim_html = sre.compile(r'\.html?(?=/?$)', sre.IGNORECASE)

    def process_request(self, request):
        """ Rewrite incoming URL if it contains an htm or html extension."""
        if request.method == 'GET':
            #Excise any .html ending.
            new_path = self.re_trim_html.sub('', request.path)
            if new_path != request.path:
                # URL was trimmed; redirect.
                # (Borrowed from django.middleware.common.CommonMiddleware.)
                host = http.get_host(request)
                if host:
                    newurl = "%s://%s%s" % (request.is_secure() and 'https' or 'http', host, new_path)
                else:
                    newurl = newpath
                urlencode = request.GET.urlencode()
                if len(urlencode):
                    newurl += '?' + urlencode
                return http.HttpResponsePermanentRedirect(newurl)

        return None
