Changesets can be listed by changeset number.
The Git repository is here.
- Revision:
- 479
- Log:
Esacpe "[" and "]" in news feed parsed output, as it was being treated
like Textile data by the CMS and therefore rendered incorrectly.
- Author:
- rool
- Date:
- Mon Sep 09 20:55:23 +0100 2019
- Size:
- 6466 Bytes
1 | # See "news_tag_extension.rb" for details. |
2 | |
3 | require 'uri' |
4 | require 'net/https' |
5 | |
6 | module NewsTag |
7 | include Radiant::Taggable |
8 | |
9 | desc %{ |
10 | Supply the tag with a "feed" parameter, which provides a |
11 | fully qualified URL pointing to an XML RSS feed. The feed is |
12 | parsed and a 'latest news' summary generated from it. Example: |
13 | |
14 | <pre><code> |
15 | <r:news feed="http://my.url/news.xml" headlines="4" dates="0" /> |
16 | </code></pre> |
17 | |
18 | A 'headlines' attribute is optional; it defines how many entries |
19 | will be included in the news summary and defaults to '4'. |
20 | |
21 | A 'dates' attribute is also optional; it says whether or not |
22 | article published or modified dates (if found) will be added in |
23 | small text after each headline. If '0' there are no dates, else |
24 | dates are shown. The default value is '1', to show dates. Dates are |
25 | extracted from the feed's "pubDate", "modified" or "dc_date" fields, |
26 | in that order. |
27 | |
28 | An optional 'https' attribute defaults to '0'. If '1', HTTP URLs |
29 | are upgraded to HTTPS if request.ssl? is true, else they are left |
30 | alone. |
31 | |
32 | An optional 'escaped' attribute (not to be confused with 'escape', |
33 | below!) states whether or not the feed URIs themselves are already |
34 | escaped for including in links (i.e. there is use of "%xx" escape |
35 | sequences where "xx" is a two digit hex number). Defaults to zero; |
36 | URI.escape() will be run on the links. If set to a non-zero value, |
37 | URI.escape() will not be called though "~" characters will still |
38 | be substituted with '%7E'. |
39 | |
40 | Finally, an optional 'escape' attribute, defaulting to '1', ensures |
41 | that RSS titles or links cannot be accidentally interpreted as |
42 | Textile data for Textile filtered parts. Setting the attribute to |
43 | '0' disables escaping to allow headlines marked up in Textile to be |
44 | passed through to the Textile parser. |
45 | |
46 | Note that '<' and '>' characters in RSS item titles will always be |
47 | escaped to HTML entities for security. |
48 | |
49 | If you want to use HTTPS fetches for feeds and your HTTPS server |
50 | requires a certificate chain, you'll need to ensure that Radiant |
51 | runs with environment variable SSL_CERT_CHAIN pointing to the |
52 | full file path of the relevant ".crt" bundle. Otherwise you will |
53 | see SSL errors thrown by Ruby instead of your parsed XML data. |
54 | } |
55 | tag "news" do |tag| |
56 | |
57 | feed = tag.attr['feed'] |
58 | dates = (tag.attr['dates'] || '1').to_i |
59 | to_https = (tag.attr['https'] || '0').to_i |
60 | escape = (tag.attr['escape'] || '1').to_i |
61 | escaped = (tag.attr['escaped'] || '0').to_i |
62 | headlines = (tag.attr['headlines'] || '4').to_i |
63 | |
64 | raise TagError.new("No feed URL given in `news' tag") if (feed.nil? or feed.empty?) |
65 | |
66 | # Fetch the feed and parse it. |
67 | |
68 | uri = URI.parse(feed) |
69 | rss = if (uri.scheme == 'https') |
70 | |
71 | cert_chain = ENV['SSL_CERT_CHAIN'] |
72 | https = Net::HTTP.new(uri.host, uri.port) |
73 | https.use_ssl = true |
74 | https.verify_mode = OpenSSL::SSL::VERIFY_NONE # OpenSSL::SSL::VERIFY_PEER |
75 | https.ca_file = cert_chain unless (cert_chain.nil? || cert_chain.empty?) |
76 | |
77 | feed_data = https.start do |http| |
78 | request = Net::HTTP::Get.new(uri.request_uri) |
79 | response = https.request(request) |
80 | |
81 | raise "#{ response.code }: #{ response.messages }" unless (response.code.to_i >= 200 && response.code.to_i <= 299) |
82 | response.body |
83 | end |
84 | |
85 | RSS::Parser.parse(feed_data) |
86 | |
87 | else |
88 | RSS::Parser.parse(feed) |
89 | |
90 | end |
91 | |
92 | done = 0 |
93 | out = "<ul>\n" |
94 | |
95 | # Escape the data for Textile filtered pages if required. |
96 | |
97 | if (escape != 0 and @filter == 'Textile') |
98 | out = '<notextile>' + out |
99 | end |
100 | |
101 | # Loop through all items in the feed. |
102 | |
103 | rss.items.each do |item| |
104 | |
105 | # The item must have at least a title. The respond_to? check is to |
106 | # cope with normal (string) versus Atom (structure) feed behaviour. |
107 | |
108 | if item.title.respond_to?(:empty?) |
109 | title = item.title |
110 | else |
111 | title = item.title.content |
112 | end |
113 | |
114 | next if (title.blank?) |
115 | |
116 | # If we've got a title, increase the headline count and bail if |
117 | # the limit has been exceeded. |
118 | |
119 | done += 1 |
120 | break if (done > headlines) |
121 | |
122 | # Add HTML for this item to the output string. |
123 | |
124 | out << ' <li>' |
125 | |
126 | # Ensure the title string doesn't contain unsafe characters - |
127 | # RSS feeds can be used maliciously and square brackets can |
128 | # confuse the CMS someties |
129 | |
130 | title = title.dup |
131 | title.gsub!('<', '<') |
132 | title.gsub!('>', '%gt;') |
133 | title.gsub!('[', '[') |
134 | title.gsub!(']', ']') |
135 | |
136 | # Markdown doesn't process text here anyway, possibly because |
137 | # the HTML list markup seems to stop it from doing so. Don't |
138 | # escape Markdown for now - the code below has been tested and |
139 | # does work though, so it can be introduced later if need be. |
140 | # |
141 | #if (escape != 0 and @filter == 'Markdown') |
142 | # title.gsub!(/([`*_{}\[\]()#.!])/) { '\\' + $& } |
143 | #end |
144 | |
145 | # Insert link HTML if a link is present, escaping it and |
146 | # manually converting "~" characters to the "%7E" equivalent. |
147 | |
148 | if item.link.respond_to?(:empty?) |
149 | link = item.link |
150 | else |
151 | link = item.link.href |
152 | end |
153 | |
154 | unless (link.blank?) |
155 | if (to_https != 0 && request.ssl?) |
156 | uri = URI.parse(link) |
157 | uri.scheme = 'https' if (uri.scheme == 'http') |
158 | link = uri.to_s |
159 | end |
160 | |
161 | link = URI.escape(link) if (escaped == 0) |
162 | link.gsub!(/\~/, '%7E') |
163 | out << "<a href=\"#{link}\">#{title}</a>" |
164 | else |
165 | out << "#{title}" |
166 | end |
167 | |
168 | # Attempt to extract an item publication/modification date. |
169 | |
170 | time = nil |
171 | |
172 | if item.respond_to?(:updated) |
173 | # Atom feeds |
174 | time = item.updated.content |
175 | elsif item.respond_to?(:pubDate) |
176 | # Typo blogs, The Register |
177 | time = item.pubDate |
178 | elsif item.respond_to?(:modified) |
179 | # RForum installations, generic |
180 | time = item.modified |
181 | elsif item.respond_to?(:dc_date) |
182 | # SlashDot |
183 | time = item.dc_date |
184 | end |
185 | |
186 | # Add the date if found and if attributes say to do so, then |
187 | # close the list item. |
188 | |
189 | out << time.strftime(' <small>(%d-%b-%Y)</small>') if (time.class == Time and dates != 0) |
190 | out << "</li>\n" |
191 | end |
192 | |
193 | # Close the list, handle Textile escaping if necessary and |
194 | # return the final chunk of data. |
195 | |
196 | out << "</ul>\n" |
197 | |
198 | if (escape != 0 and @filter == 'Textile') |
199 | out << '</notextile>' |
200 | end |
201 | |
202 | out |
203 | |
204 | end # 'tag "news" do |tag|' |
205 | end |