Put it in appname/templatetags/truncatehtml.py and load it with {% load truncatehtml %}, then for instance {{ some_story|truncatehtml:100 }} to truncate the story to 100 characters.
Tags are not counted in the length given, and character entities count as one character.
The filter should never break an open-tag text close-tag sequence without adding in the close tag. It will also preserve character entities. It won't sanitize the HTML, though: garbage in, garbage out.
There's a bit more info about how it works in a blog post I wrote.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | from django import template
 
register = template.Library()
import re
tag_end_re = re.compile(r'(\w+)[^>]*>')
entity_end_re = re.compile(r'(\w+;)')
@register.filter
def truncatehtml(string, length, ellipsis='...'):
    """Truncate HTML string, preserving tag structure and character entities."""
    length = int(length)
    output_length = 0
    i = 0
    pending_close_tags = {}
    
    while output_length < length and i < len(string):
        c = string[i]
        if c == '<':
            # probably some kind of tag
            if i in pending_close_tags:
                # just pop and skip if it's closing tag we already knew about
                i += len(pending_close_tags.pop(i))
            else:
                # else maybe add tag
                i += 1
                match = tag_end_re.match(string[i:])
                if match:
                    tag = match.groups()[0]
                    i += match.end()
  
                    # save the end tag for possible later use if there is one
                    match = re.search(r'(</' + tag + '[^>]*>)', string[i:], re.IGNORECASE)
                    if match:
                        pending_close_tags[i + match.start()] = match.groups()[0]
                else:
                    output_length += 1 # some kind of garbage, but count it in
                    
        elif c == '&':
            # possible character entity, we need to skip it
            i += 1
            match = entity_end_re.match(string[i:])
            if match:
                i += match.end()
            # this is either a weird character or just '&', both count as 1
            output_length += 1
        else:
            # plain old characters
            
            skip_to = string.find('<', i, i + length)
            if skip_to == -1:
                skip_to = string.find('&', i, i + length)
            if skip_to == -1:
                skip_to = i + length
                
            # clamp
            delta = min(skip_to - i,
                        length - output_length,
                        len(string) - i)
            output_length += delta
            i += delta
                        
    output = [string[:i]]
    if output_length == length:
        output.append(ellipsis)
    for k in sorted(pending_close_tags.keys()):
        output.append(pending_close_tags[k])
    return "".join(output)
truncatehtml.is_safe = True
 | 
More like this
- Add Toggle Switch Widget to Django Forms by OgliariNatan 1 month, 3 weeks ago
- get_object_or_none by azwdevops 5 months, 2 weeks ago
- Mask sensitive data from logger by agusmakmun 7 months, 1 week ago
- Template tag - list punctuation for a list of items by shapiromatron 1 year, 9 months ago
- JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 1 year, 9 months ago
Comments
Better use of integrated tools Django.
#
Please login first before commenting.