package com.gaowj.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.security.SecureRandom;
import java.security.cert.CertificateException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import javax.security.cert.X509Certificate;

import org.apache.commons.lang.StringUtils;

import com.gaowj.business.util.AssertUtil;

public class UrlDataUtil {
	/**
	 * 取得通过URL抓取数据
	 * 
	 * @return List<Map<String, Object>>
	 * @throws IOException
	 * @throws Exception
	 */
	public static List<Map<String, Object>> readContentFromGet(String url)
			throws IOException, Exception {

		URL getUrl = new URL(url);

		// 根据拼凑的URL，打开连接，URL.openConnection()函数会根据
		// URL的类型，返回不同的URLConnection子类的对象，在这里我们的URL是一个http，因此它实际上返回的是HttpURLConnection
		HttpURLConnection connection = (HttpURLConnection) getUrl
				.openConnection();

		// 建立与服务器的连接，并未发送数据
		connection.connect();

		// 发送数据到服务器并使用Reader读取返回的数据
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				connection.getInputStream()));

		String lines;
		List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
		while ((lines = reader.readLine()) != null) {
			// 截取标准的json字符
			lines = lines.substring(9, lines.length() - 1);
			if ("[]".equals(lines)) {
				return null;
			}
			// 将json字符串转换成集合
			list.addAll(AssertUtil.fromJsonString(lines));

		}
		reader.close();
		// 断开连接
		connection.disconnect();
		return list;
	}
	public static List<Map<String, Object>> readContentFromGetHttps(String url)
			throws IOException, Exception {
		
		// Create a trust manager that does not validate certificate chains
	    TrustManager[] trustAllCerts = new TrustManager[]{
	    		new X509TrustManager(){
	               public java.security.cert.X509Certificate[] getAcceptedIssuers(){return null;}
	               public void checkClientTrusted(X509Certificate[] certs, String authType){}
	               public void checkServerTrusted(X509Certificate[] certs, String authType){}
				@Override
				public void checkClientTrusted(
						java.security.cert.X509Certificate[] chain,
						String authType) throws CertificateException {
					// TODO Auto-generated method stub
					
				}
				@Override
				public void checkServerTrusted(
						java.security.cert.X509Certificate[] chain,
						String authType) throws CertificateException {
					// TODO Auto-generated method stub
					
				}
	    }
	    };
	 
	    // Install the all-trusting trust manager
	 
	    SSLContext sc = SSLContext.getInstance("TLS");
	    sc.init(null, trustAllCerts, new SecureRandom());
	    HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());

		URL getUrl = new URL(url);

		// 根据拼凑的URL，打开连接，URL.openConnection()函数会根据
		// URL的类型，返回不同的URLConnection子类的对象，在这里我们的URL是一个http，因此它实际上返回的是HttpURLConnection
		HttpURLConnection connection = (HttpURLConnection) getUrl
				.openConnection();

		// 建立与服务器的连接，并未发送数据
		connection.connect();

		// 发送数据到服务器并使用Reader读取返回的数据
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				connection.getInputStream()));

		String lines;
		List<Map<String, Object>> list = new ArrayList<Map<String, Object>>();
		while ((lines = reader.readLine()) != null) {
			// 截取标准的json字符
			lines = lines.substring(9, lines.length() - 1);
			if ("[]".equals(lines)) {
				return null;
			}
			// 将json字符串转换成集合
			list.addAll(AssertUtil.fromJsonString(lines));

		}
		reader.close();
		// 断开连接
		connection.disconnect();
		return list;
	}
	
	public static String getUrlContent(String urlPath,String take_start,String take_end,String encoded){

		URL getUrl;
		String content="";
		try {
			getUrl = new URL(urlPath);
		

		// 根据拼凑的URL，打开连接，URL.openConnection()函数会根据
		// URL的类型，返回不同的URLConnection子类的对象，在这里我们的URL是一个http，因此它实际上返回的是HttpURLConnection
		HttpURLConnection connection = (HttpURLConnection) getUrl
				.openConnection();

		// 建立与服务器的连接，并未发送数据
		connection.connect();

		// 发送数据到服务器并使用Reader读取返回的数据
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				connection.getInputStream(),encoded));

		String read="";
		while(read!=null){
		  read=reader.readLine();
		  if(read!=null)content=content+read;
	    }
		reader.close();
		// 断开连接
		
		connection.disconnect();
		//content = getCN_ZH(content, "gbk");
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			//e.printStackTrace();
			content="url_error";
		} catch (IOException e) {
			// TODO Auto-generated catch block
			//e.printStackTrace();
			content="read_error";
			
		}
		if (("read_error,url_error").trim().indexOf(content) == -1) {
			int st = 0;
			if (StringUtils.isNotEmpty(take_start)) {
				st = content.indexOf(take_start);
				if(st>-1){
				  content = content.substring(st);
				}
			}
			if (StringUtils.isNotEmpty(take_end)) {
				st = content.indexOf(take_end);
				if(st>-1){
				  content = content.substring(0, st + take_end.length());
				}
			}
		}
		return content;
	}
	
	public static String getUrlContent(String urlPath,String encoded){

		URL getUrl;
		String content="";
		try {
			getUrl = new URL(urlPath);
		

		// 根据拼凑的URL，打开连接，URL.openConnection()函数会根据
		// URL的类型，返回不同的URLConnection子类的对象，在这里我们的URL是一个http，因此它实际上返回的是HttpURLConnection
		HttpURLConnection connection = (HttpURLConnection) getUrl
				.openConnection();

		// 建立与服务器的连接，并未发送数据
		connection.connect();

		// 发送数据到服务器并使用Reader读取返回的数据
		BufferedReader reader = new BufferedReader(new InputStreamReader(
				connection.getInputStream(),encoded));

		String read="";
		while(read!=null){
		  read=reader.readLine();
		  if(read!=null)content=content+read;
	    }
		reader.close();
		// 断开连接
		
		connection.disconnect();
		//content = getCN_ZH(content, "gbk");
		} catch (MalformedURLException e) {
			// TODO Auto-generated catch block
			//e.printStackTrace();
			content="url_error";
		} catch (IOException e) {
			// TODO Auto-generated catch block
			//e.printStackTrace();
			content="read_error";
			
		}
		return content;
	}
	
	public static String getCN_ZH(String inString,String encoded){//中文处理
    	String outString="";
    	if(inString!=null && inString.trim().length()>0){
    		try {
				outString=new String(inString.getBytes("iso8859_1"),encoded);
			} catch (UnsupportedEncodingException e) {
				// TODO 自动生成 catch 块
				e.printStackTrace();
			}
    	}
    	return outString;
    }
	
	public static void main(String[] args) {
		String temp = getUrlContent("http://data.haishu.gov.cn/hs_main1","<table width=\"98%\" border=\"0\" cellpadding=\"0\" cellspacing=\"0\"									class=\"black_14_24\">","</table>","utf-8");
		temp = temp.replaceAll("\\s+"," ");
		System.out.println(temp);
//		System.out.println("你们 嘉应制药          竹".replaceAll(" +", "  ")); //把多个空格替换为单个空格，" +"表示连续的多个空格);
	}
}
