def parse_rss_url(url, last_modified = nil)
begin
content, modified = Engine::get_url(url, last_modified)
if $config['log_content']
meth = 'Raggle::Channel#parse_rss_url'
$log.debug(meth) {
content_str = content ? content[0, 80] : '<nil>'
"modified: #{modified}, content: #{content_str}"
}
end
rescue
raise "Couldn't get URL \"#{url}\": #$!."
end
@last_modified = modified
if content && (!modified || modified != last_modified)
content.gsub!(/<!ENTITY %.*?>/m, '') if \
$config['strip_external_entities'] && content =~ /<!ENTITY %.*?>/m
doc = REXML::Document.new content
is_atom = (doc.root.name == 'feed')
e = nil
if is_atom
@title = e.text if e = doc.root.elements['//feed/title']
@link = e.attributes['href'] if e = doc.root.elements['//feed/link']
@desc = e.text if e = doc.root.elements['//feed/tagline']
else
@title = e.text if e = doc.root.elements['//channel/title']
@link = e.text if e = doc.root.elements['//channel/link']
@desc = e.text if e = doc.root.elements['//channel/description']
@image = e.text if e = doc.root.elements['//image/url']
@lang = e.text if e = doc.root.elements['//channel/language']
end
@items = []
item_element_path = is_atom ? '//feed/entry' : '//item'
doc.root.elements.each(item_element_path) { |e|
h = {}
if is_atom
h = parse_atom_entry(e)
else
['title', 'link', 'date', 'description'].each { |val|
h[val] = (t_e = e.elements[val]) ? fix_character_encoding(t_e) : ''
}
['link', 'date', 'description'].each { |key|
h[key] = find_element(e, key)
}
if $config['enclosure_hook_cmd']
e.elements.each('enclosure') do |enclosure_element|
handle_rss_enclosure(h, enclosure_element)
end
end
end
@items << Feed::Item.new(h['title'], h['link'],
h['description'], h['date'])
}
end
end