1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49 | import re
from urlparse import urlsplit, urlunsplit
_coral_suffix = '.nyud.net'
_regex = '(?P<prefix><a.*href=")(?P<url>.*)(?P<suffix>".*>)'
_anchor_regex = re.compile( _regex )
class CoralCDNMiddleware(object):
"""
This middleware rewrites anchor tags contained in the response
content so that the pages are fetched through the Coral Content
Distribution Network [http://coralcdn.org/].
"""
def process_response(self, request, response):
# Function called by re.sub() to compute the replacement value
# for any matches it finds.
def a_replacer( match ):
# The URL is captured by a named group in the regex.
url = match.group( 'url' )
parts = urlsplit( url )
# Append the Coral CDN suffix to the 'netloc' URL part,
# assuming it's there. If not, we're looking at local
# reference so no need to rewrite the URL.
if parts.netloc:
# Append the suffix before any port number.
netloc_parts = parts.netloc.split( ':' )
netloc_parts[0] += _coral_suffix
# Replace the 'netloc' part of the urlsplit() result
# tuple.
parts = list( parts )
parts[1] = ':'.join( netloc_parts )
# Replace the named group 'url' in the match with the
# new URL.
prefix = match.group( 'prefix' )
suffix = match.group( 'suffix' )
anchor = prefix + urlunsplit( parts ) + suffix
else:
anchor = match.group()
return anchor
# Find all anchor tags in the response content and rewrite
# them.
response.content = _anchor_regex.sub( a_replacer, response.content )
return response
|
Comments