package com.jw.app.util;

import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;

import org.apache.commons.lang3.StringUtils;

import com.gaowj.service.client.utils.UrlDataUtil;
import com.sun.syndication.feed.synd.SyndCategoryImpl;
import com.sun.syndication.feed.synd.SyndEntry;
import com.sun.syndication.feed.synd.SyndFeed;
import com.sun.syndication.io.FeedException;
import com.sun.syndication.io.SyndFeedInput;
import com.sun.syndication.io.XmlReader;
/**
 * rss、url抓取的数据获取
 * @author chenhong
 *
 */
public class RssUrlData {
	/**
	 * rss解析xml，填充task_content(首页显示)  rss_content(更多显示)
	 * @param entity
	 * @return
	 * @throws IOException
	 * @throws IllegalArgumentException
	 * @throws FeedException
	 */
	public static Map<String, Object> rssContent(Map<String, Object> entity) throws IOException, IllegalArgumentException, FeedException {
		if (entity.get("P_TYPE") != null
				&& StringUtils.equals((String) entity.get("P_TYPE"), "3")) {
			String url = (String) entity.get("URL");
			String is_pop = (String) entity.get("IS_POP");
			String portletId = (String) entity.get("PORTLET_ID");
			URL Url = new URL(url);
			SyndFeedInput input = new SyndFeedInput();
			SyndFeed feed = null;
			URLConnection conn;
			conn = Url.openConnection();
			String content_encoding = conn.getHeaderField("Content-Encoding");

			if (content_encoding != null && content_encoding.contains("gzip")) {
				System.out.println("conent encoding is gzip");
				GZIPInputStream gzin = new GZIPInputStream(conn
						.getInputStream());
				feed = input.build(new XmlReader(gzin));
			} else {
				feed = input.build(new XmlReader(conn.getInputStream()));
			}

			List entries = feed.getEntries();// 得到所有的标题<title></title>
			String content_html = "";
			content_html = content_html + "<table width=\"100%\">";
			String rss_content = "<table width=\"100%\">";
			int count = 0;
			for (int i = 0; i < entries.size(); i++) {
				SyndEntry entry = (SyndEntry) entries.get(i);
				String title = entry.getTitle();
				Date publisheddate=entry.getPublishedDate();
				SimpleDateFormat myFmt2=new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");//等价于now.toLocaleString()
				String Author=entry.getAuthor();
				if(Author==null||Author.equals(""))
					if(publisheddate!=null)
						Author=myFmt2.format(publisheddate);
					else {
						List<SyndCategoryImpl> list=entry.getCategories();
						if (list.size()>0)
							Author=list.get(0).getName();
					}
						
				if (title.length() > 18)
					title = title.substring(0, 18) + "...";
				if (is_pop != null && StringUtils.equals(is_pop, "1")) {// 转到新页面
					rss_content = rss_content
							+ "<tr><td width=\"75%\" height=\"25\" style=\"border-bottom:dashed 1px #ccc;\"><a href='"
							+ entry.getLink()
							+ "' target=\"_blank\" style=\"color:#000;\"><image src=\"images/homepage/s_left.gif\"/><span style=\"padding-left:3px;\">"
							+ entry.getTitle()
							+ "</span></a></td>"
							+ "<td style=\"border-bottom:dashed 1px #ccc;\"><span style=\"padding-left:10px;\">"
							+ Author + "</span></td></tr>";
					if (count > 9)
						continue;
					content_html = content_html
							+ "<tr><td width=\"65%\" height=\"25\" style=\"border-bottom:dashed 1px #ccc;\"><a href='"
							+ entry.getLink()
							+ "' target=\"_blank\" style=\"color:#000;\"><image src=\"images/homepage/s_left.gif\"/><span style=\"padding-left:3px;\">"
							+ title
							+ "</span></a></td>"
							+ "<td style=\"border-bottom:dashed 1px #ccc;\"><span style=\"padding-left:10px;\">"
							+ Author + "</span></td></tr>";

				} else {// 默认弹出
					rss_content = rss_content
							+ "<tr><td height=\"25\" width=\"75%\" style=\"border-bottom:dashed 1px #ccc;\"><a href=\"javascript:window.parent.openNewNoLock('"
							+ entry.getLink()
							+ "','','"
							+ entry.getTitle()
							+ "',1024,500);\" style=\"color:#000;\"><image src=\"images/homepage/s_left.gif\"/><span style=\"padding-left:3px;\">"
							+ entry.getTitle()
							+ "</span></a></td>"
							+ "<td style=\"border-bottom:dashed 1px #ccc;\"><span style=\"padding-left:10px;\">"
							+ Author + "</span></td></tr>";
					if (count > 9)
						continue;
					content_html = content_html
							+ "<tr><td height=\"25\" width=\"65%\" style=\"border-bottom:dashed 1px #ccc;\"><a href=\"javascript:window.parent.openNewNoLock('"
							+ entry.getLink()
							+ "','','"
							+ entry.getTitle()
							+ "',1024,500);\" style=\"color:#000;\"><image src=\"images/homepage/s_left.gif\"/><span style=\"padding-left:3px;\">"
							+ title
							+ "</span></a></td>"
							+ "<td style=\"border-bottom:dashed 1px #ccc;\"><span style=\"padding-left:10px;\">"
							+ Author + "</span></td></tr>";

				}
				if (count == 9) {
					content_html = content_html
							+ "<tr><td colspan=\"4\" align=\"right\"><a href=\"javascript:alertRSSGrid('"
							+ portletId
							+ "');\" style=\"color:#000;\">>>更多</a></td></tr>";
				}
				count++;
			}
			content_html = content_html + "</table>";
			rss_content = rss_content + "</table>";
			entity.put("TAKE_CONTENT", content_html);
			entity.put("RSS_CONTENT", rss_content);
		}
		return entity;
	}
	
	/**
	 * URL抓取 TAKE_CONTENT
	 * @param l_data
	 * @return
	 */
	public  static  Map<String , Object>urlContent(Map<String , Object> l_data) {
		String content = "";
		String view_content = "";
		String view_content1 = "";
		if (l_data.get("URL") != null) {
			String take_url = (String) l_data.get("URL");
			String take_start = (String) l_data.get("TAKE_START");
			String take_end = (String) l_data.get("TAKE_END");
			String url_encoded = (String) l_data.get("URL_ENCODED");
			String is_pop = (String) l_data.get("IS_POP");
			String title = (String) l_data.get("TITLE");

			// request.getHeader("Host")替换
			String myurl = take_url;
			myurl = myurl.substring(myurl.indexOf("http://") + 7);
			if (myurl.indexOf("/") > 0)
				myurl = myurl.substring(0, myurl.indexOf("/"));

			content = UrlDataUtil.getUrlContent(take_url, take_start,
					take_end, url_encoded);
			view_content = content;

			String pattern = "href=\"([^\"]*)\"";
			Pattern pKey = Pattern.compile(pattern, 2 | Pattern.DOTALL);
			Matcher mKey = pKey.matcher(content);
			boolean result = mKey.find();
			while (result) {
				if (mKey.group(1).indexOf("http://") == -1) {
					view_content = view_content.replace(mKey.group(1),
							"http://" + myurl + mKey.group(1));
				}
				result = mKey.find();
			}
			// 设计转新页面还是弹出
			view_content1 = view_content;
			pattern = "<a(.*?)>(.*?)";
			pKey = Pattern.compile(pattern, 2 | Pattern.DOTALL);
			mKey = pKey.matcher(view_content);
			result = mKey.find();
			while (result) {
				if (is_pop != null && StringUtils.equals(is_pop, "1")) {// 转到新页面

					if (mKey.group(1).toLowerCase().indexOf("target=") == -1) {
						view_content1 = view_content1.replace(mKey
								.group(1), mKey.group(1)
								+ " target=\"_blank\"");
					}
				} else {// 默认弹出
					String pattern1 = "href=\"([^\"]*)\"";
					Pattern pKey1 = Pattern.compile(pattern1,
							2 | Pattern.DOTALL);
					Matcher mKey1 = pKey1.matcher(mKey.group(1));
					mKey1.find();
					String t_href = mKey1.group(1);
					view_content1 = view_content1.replace(
							mKey.group(1),
							" href = \"javascript:window.parent.openNewNoLock('"
									+ t_href + "','','" + title
									+ "',915,500);\"");

				}
				result = mKey.find();
			}
			// System.out.println(view_content1);
		}
		view_content1 = view_content1.replaceAll("\\s+", " ");
		l_data.put("TAKE_CONTENT", view_content1);
		return l_data;
	}
}
