微信

使用微信服务,更方便

职友集>Java面试题 > 分享:新浪RSS解析,比较老的一个话题啦

分享:新浪RSS解析,比较老的一个话题啦

2015-11-22 06:30:01 阅读( 140 )

3046人 收藏本页

标签:Java面试题

代码结构不复杂Main.java 主类News.java和Outline.java 一条新闻对象和一个新闻频道对象PublicUtils.java 工具类很简单,就一个读取配置文件中新浪RSS的地址的方法Dom4jXmlParser.java 这个类是核心,根据工具类读取进来的地址进行解析XML文档,获取到各个频道的集合,然后再迭代解析各个频道内的XML获取新闻的集合,用News(新闻)和Outline(频道)封装。

[Java]代码

文件:resource_sina.properties
内容:url=http://rss.sina.com.cn/sina_all_opml.xml
作用:存放新浪RSS接口地址的配置文件

[Java]代码

package cn.outofmemory.code;

import java.net.URL;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import cn.tyz.domain.News;
import cn.tyz.domain.Outline;
import cn.tyz.utils.PublicUtils;
import cn.tyz.xml.Dom4jXmlParser;

public class Main
{
public static void main(String[] args)
{
try
{
// 获取各个频道名称及子频道集合,并迭代
Map<String, List<Outline>> map_Channels =
Dom4jXmlParser.getLinkAsXmlData(new URL(PublicUtils.getUrl()));
Iterator<Entry<String, List<Outline>>>
iterator_Channels = map_Channels.entrySet().iterator();
while(iterator_Channels.hasNext())
{
// 一个频道
Entry<String, List<Outline>> entry = iterator_Channels.next();
String channelName = entry.getKey(); // 频道名称
List<Outline> outlines = entry.getValue(); // 频道内子频道项目

System.out.println("模块名称:"+channelName);

// 迭代子频道
Iterator<Outline> iterator_Item = outlines.iterator();
while(iterator_Item.hasNext())
{
// 一个子频道
Outline outline = iterator_Item.next();
System.out.println(outline.getText() + ":" + outline.getXmlUrl() + "内容:\n");
URL url = new URL(outline.getXmlUrl());
// 根据子频道的XMLURL获取新闻集合,并迭代
List<News> list_News = Dom4jXmlParser.getNewsAsXmlData(url);
Iterator<News> iterator_News = list_News.iterator();
while(iterator_News.hasNext())
{
// 一条新闻
News news = iterator_News.next();
System.out.println(news.getTitle().trim());
System.out.println(news.getDescription().trim());
System.out.println(news.getPubDate().trim());
System.out.println(news.getLink().trim());
System.out.println(news.getCategory().trim());
System.out.println(news.getComments().trim());
System.out.println("\n");
}
}
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
}

[Java]代码

package cn.tyz.utils;

import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;

public class PublicUtils
{
// 读取资源文件中的URL
public static String getUrl()
{
String url = "";

InputStream in = PublicUtils.class.getClassLoader()
.getResourceAsStream("resource_sina.properties");
Properties properties = new Properties();
try
{
properties.load(in);
}
catch (IOException e)
{
e.printStackTrace();
}

url = properties.getProperty("url");

return url;
}
}

[Java]代码

package cn.tyz.domain;

public class News
{
private String title;
private String link;
private String author;
private String guid;
private String category;
private String pubDate;
private String comments;
private String description;

public String getTitle()
{
return title;
}

public void setTitle(String title)
{
this.title = title;
}

public String getLink()
{
return link;
}

public void setLink(String link)
{
this.link = link;
}

public String getAuthor()
{
return author;
}

public void setAuthor(String author)
{
this.author = author;
}

public String getGuid()
{
return guid;
}

public void setGuid(String guid)
{
this.guid = guid;
}

public String getCategory()
{
return category;
}

public void setCategory(String category)
{
this.category = category;
}

public String getPubDate()
{
return pubDate;
}

public void setPubDate(String pubDate)
{
this.pubDate = pubDate;
}

public String getComments()
{
return comments;
}

public void setComments(String comments)
{
this.comments = comments;
}

public String getDescription()
{
return description;
}

public void setDescription(String description)
{
this.description = description;
}
}

[Java]代码

package cn.tyz.domain;

public class Outline
{
private String title;
private String text;
private String type;
private String xmlUrl;
private String htmlUrl;

public String getTitle()

{
return title;
}

public void setTitle(String title)
{
this.title = title;
}

public String getText()
{
return text;
}

public void setText(String text)
{
this.text = text;
}

public String getType()
{
return type;
}

public void setType(String type)
{
this.type = type;
}

public String getXmlUrl()
{
return xmlUrl;
}

public void setXmlUrl(String xmlUrl)
{
this.xmlUrl = xmlUrl;
}

public String getHtmlUrl()
{
return htmlUrl;
}

public void setHtmlUrl(String htmlUrl)
{
this.htmlUrl = htmlUrl;
}
}

[Java]代码

package cn.tyz.xml;

import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

import cn.tyz.domain.News;
import cn.tyz.domain.Outline;

public class Dom4jXmlParser
{
// 根据URL解析大纲XML数据,获取各个频道的名称及频道内的子频道的名称和XML数据链接
public static Map<String, List<Outline>> getLinkAsXmlData(URL url) throws Exception
{
Map<String, List<Outline>> map_Channels = new HashMap<String, List<Outline>>();

// 加载XML到内存解析并得到Document对象
SAXReader reader = new SAXReader();
Document document = reader.read(url);

// 获取根节点
Element root = document.getRootElement();

// 获取根节点下的body,然后获取其下的所有一级的outline节点,并迭代
List<Element> ele_Channels = root.element("body").elements("outline");
Iterator<Element> iterator_Channel = ele_Channels.iterator();
while(iterator_Channel.hasNext())
{
// 频道名称节点
Element ele_Channel = iterator_Channel.next();
// 获取频道名称
String channelName = ele_Channel.attributeValue("text");

// 获取当前频道下的所有outline节点,并迭代
List<Element> ele_Items = ele_Channel.elements("outline");
Iterator<Element> iterator_Item = ele_Items.iterator();

// 存放一个频道内的子频道的集合
List<Outline> list_Items = new ArrayList<Outline>();
while(iterator_Item.hasNext())
{
// 子频道节点
Element ele_Item = iterator_Item.next();

Outline outline = new Outline();
outline.setTitle(ele_Item.attributeValue("title"));
outline.setText(ele_Item.attributeValue("text"));
outline.setType(ele_Item.attributeValue("type"));
outline.setXmlUrl(ele_Item.attributeValue("xmlUrl"));
outline.setHtmlUrl(ele_Item.attributeValue("htmlUrl"));

list_Items.add(outline);
}

map_Channels.put(channelName, list_Items);
}

return map_Channels;
}

// 根据URL解析单个子频道内的XML新闻数据
public static List<News> getNewsAsXmlData(URL url) throws Exception
{
// 加载XML到内存解析并得到Document对象
SAXReader reader = new SAXReader();
Document document = reader.read(url);

// 获取根节点
Element root = document.getRootElement();

// 获取所有新闻条目的集合,并迭代
List<Element> ele_Items = root.element("channel").elements("item");
Iterator<Element> iterator_Item = ele_Items.iterator();

// 存放所有新闻项的集合
List<News> list_News = new ArrayList<News>();
while(iterator_Item.hasNext())
{
// 新闻项节点
Element ele_Item = iterator_Item.next();

News news = new News();
news.setTitle(ele_Item.elementText("title"));
news.setLink(ele_Item.elementText("link"));
news.setAuthor(ele_Item.elementText("author"));
news.setGuid(ele_Item.elementText("guid"));
news.setCategory(ele_Item.elementText("category"));
news.setPubDate(ele_Item.elementText("pubDate"));
news.setComments(ele_Item.elementText("comments"));
news.setDescription(ele_Item.elementText("description"));

list_News.add(news);
}

return list_News;
}
}

下一篇:怎么使用java获得当前时间

上一篇:java连接SqlServer数据库调用存储过程代码

亲~ 如果您有更好的答案 可在评论区发表您独到的见解。

您想查看更多的信息: 面试题