Changesets can be listed by changeset number.
The Git repository is here.
- Revision:
- 451
- Log:
Tighten up filtering of Textile and HTML markup in forum posts, given
that spammers now sometimes get in and make posts. Use the mechanism
therein to fix Ticket #354, albeit at some significant speed penalty;
this occurs only when new posts are created or old posts are edited
and isn't too large compared with the wider request activity, for
typical length posts.
- Author:
- rool
- Date:
- Wed Sep 04 09:21:32 +0100 2013
- Size:
- 3626 Bytes
1 | module WhiteListHelper |
2 | PROTOCOL_ATTRIBUTES = Set.new %w(src href) |
3 | PROTOCOL_SEPARATOR = /:|(�*58)|(p)|(%|%)3A/ |
4 | |
5 | [:bad_tags, :tags, :attributes, :protocols].each do |attr| |
6 | klass = class << self; self; end |
7 | klass.send(:define_method, "#{attr}=") { |value| class_variable_set("@@#{attr}", Set.new(value)) } |
8 | define_method("white_listed_#{attr}") { ::WhiteListHelper.send(attr) } |
9 | mattr_reader attr |
10 | end |
11 | |
12 | # This White Listing helper will html encode all tags and strip all attributes that aren't specifically allowed. |
13 | # It also strips href/src tags with invalid protocols, like javascript: especially. It does its best to counter any |
14 | # tricks that hackers may use, like throwing in unicode/ascii/hex values to get past the javascript: filters. Check out |
15 | # the extensive test suite. |
16 | # |
17 | # <%= white_list @article.body %> |
18 | # |
19 | # You can add or remove tags/attributes if you want to customize it a bit. |
20 | # |
21 | # Add table tags |
22 | # |
23 | # WhiteListHelper.tags.merge %w(table td th) |
24 | # |
25 | # Remove tags |
26 | # |
27 | # WhiteListHelper.tags.delete 'div' |
28 | # |
29 | # Change allowed attributes |
30 | # |
31 | # WhiteListHelper.attributes.merge %w(id class style) |
32 | # |
33 | # 2013-09-04 (ADH): If the attributes lists is empty, any attribute is allowed, except for "bad protocol" data which is still stripped. |
34 | # |
35 | # white_list accepts a block for custom tag escaping. Shown below is the default block that white_list uses if none is given. |
36 | # The block is called for all bad tags, and every text node. node is an instance of HTML::Node (either HTML::Tag or HTML::Text). |
37 | # bad is nil for text nodes inside good tags, or is the tag name of the bad tag. |
38 | # |
39 | # <%= white_list(@article.body) { |node, bad| white_listed_bad_tags.include?(bad) ? nil : node.to_s.gsub(/</, '<') } %> |
40 | # |
41 | def white_list(html, options = {}, &block) |
42 | return html if html.blank? || !html.include?('<') |
43 | attrs = Set.new(options[:attributes]).merge(white_listed_attributes) |
44 | tags = Set.new(options[:tags] ).merge(white_listed_tags) |
45 | block ||= lambda { |node, bad| white_listed_bad_tags.include?(bad) ? nil : node.to_s.gsub(/</, '<') } |
46 | returning [] do |new_text| |
47 | tokenizer = HTML::Tokenizer.new(html) |
48 | bad = nil |
49 | while token = tokenizer.next |
50 | node = HTML::Node.parse(nil, 0, 0, token, false) |
51 | new_text << case node |
52 | when HTML::Tag |
53 | unless tags.include?(node.name) |
54 | bad = node.name |
55 | block.call node, bad |
56 | else |
57 | bad = nil |
58 | if node.closing != :close |
59 | node.attributes.delete_if do |attr_name, value| |
60 | (!attrs.empty? && !attrs.include?(attr_name)) || (PROTOCOL_ATTRIBUTES.include?(attr_name) && contains_bad_protocols?(value)) |
61 | end if attributes.any? |
62 | end |
63 | node |
64 | end |
65 | else |
66 | block.call node, bad |
67 | end |
68 | end |
69 | end.join |
70 | end |
71 | |
72 | protected |
73 | def contains_bad_protocols?(value) |
74 | value =~ PROTOCOL_SEPARATOR && !white_listed_protocols.include?(value.split(PROTOCOL_SEPARATOR).first) |
75 | end |
76 | end |
77 | |
78 | WhiteListHelper.bad_tags = %w(script) |
79 | WhiteListHelper.tags = %w(strong em b i p code pre tt output samp kbd var sub sup dfn cite big small address hr br div span h1 h2 h3 h4 h5 h6 ul ol li dt dd abbr acronym a img blockquote del ins fieldset legend) |
80 | WhiteListHelper.attributes = %w(href src width height alt cite datetime title class) |
81 | WhiteListHelper.protocols = %w(ed2k ftp http https irc mailto news gopher nntp telnet webcal xmpp callto feed) |