import re import cgi re_string = re.compile(r'(?P[<&>])|(?P^[ \t]+)|(?P\r\n|\r|\n)|(?P(^|\s)((http|ftp)://.*?))(\s|$)', re.S|re.M|re.I) def plaintext2html(text, tabstop=4): def do_sub(m): c = m.groupdict() if c['htmlchars']: return cgi.escape(c['htmlchars']) if c['lineend']: return '
' elif c['space']: t = m.group().replace('\t', ' '*tabstop) t = t.replace(' ', ' ') return t elif c['space'] == '\t': return ' '*tabstop; else: url = m.group('protocal') if url.startswith(' '): prefix = ' ' url = url[1:] else: prefix = '' last = m.groups()[-1] if last in ['\n', '\r', '\r\n']: last = '
' return '%s%s%s' % (prefix, url, url, last) return re.sub(re_string, do_sub, text)