1. <rp id="c4hsd"></rp>
          1. <button id="c4hsd"><acronym id="c4hsd"><input id="c4hsd"></input></acronym></button>
          2. <tbody id="c4hsd"></tbody>

            <dd id="c4hsd"><pre id="c4hsd"></pre></dd>

              1. <th id="c4hsd"></th>
              2. <s id="c4hsd"><object id="c4hsd"></object></s>

                    [ 登錄 ] - [ 注冊 ] | 代碼示例DEMO | IP測試視頻 |

                    如何在Java中使用動態代理IP抓取數據

                    作者:數據無憂   時間:2020-09-18 13:24:53

                    下面的代碼演示了如何使用本站提供的動態代理IP抓取數據,程序用到了htmlunit包和多線程技術

                    package com.data5u.test;
                    
                    import java.io.BufferedInputStream;
                    import java.io.InputStream;
                    import java.net.HttpURLConnection;
                    import java.util.ArrayList;
                    import java.util.List;
                    
                    import com.gargoylesoftware.htmlunit.BrowserVersion;
                    import com.gargoylesoftware.htmlunit.ProxyConfig;
                    import com.gargoylesoftware.htmlunit.WebClient;
                    import com.gargoylesoftware.htmlunit.html.HtmlPage;
                    
                    /**
                     * 這個DEMO主要為了測試動態代理IP的穩定性
                     * 也可以作為爬蟲參考項目,如需使用,請自行修改代碼webParseHtml方法
                     */
                    public class TestDynamicIp {
                    	public static List ipList = new ArrayList<>();
                    	public static boolean gameOver = false;
                    	public static void main(String[] args) {
                    		long fetchIpSeconds = 5;
                    		int threadNum = 10;
                    		int testTime = 3;
                    		// 請填寫無憂代理IP訂單號,填寫之后才可以提取到IP哦
                    		String order = "一定要把這里改為單號哦~";
                    		// 你要抓去的目標網址
                    		String targetUrl = "http://1212.ip138.com/ic.asp";
                    		// 是否加載JS,加載JS會導致速度變慢
                    		boolean useJS = false;
                    		// 請求超時時間,單位毫秒,默認5秒
                    		int timeOut = 5000;
                    		
                    		if (order == null || "".equals(order)) {
                    			System.err.println("請輸入無憂代理IP動態代理訂單號");
                    			return;
                    		}
                    		
                    		System.out.println("############無憂代理動態IP測試開始###############");
                    		System.out.println("***************");
                    		System.out.println("接口返回IP為國內各地區,每次最多返回10個");
                    		System.out.println("提取IP間隔 " + fetchIpSeconds + " 秒 ");
                    		System.out.println("開啟爬蟲線程 " + threadNum);
                    		System.out.println("爬蟲目標網址  " + targetUrl);
                    		System.out.println("測試次數 3 ");
                    		System.out.println("***************\n");
                    		TestDynamicIp tester = new TestDynamicIp();
                    		new Thread(tester.new GetIP(fetchIpSeconds * 1000, testTime, order)).start();
                    		for (int i = 0; i < threadNum; i++) {
                    			tester.new Crawler(100, targetUrl, useJS, timeOut).start();
                    		}
                    		while(!gameOver){
                    			try {
                    				Thread.sleep(100);
                    			} catch (InterruptedException e) {
                    				e.printStackTrace();
                    			}
                    		}
                    		System.out.println("###############無憂代理動態IP測試結束###############");
                    		System.exit(0);
                    	}
                        
                    	// 抓取目標站,檢測IP
                    	public class Crawler extends Thread{
                    		@Override
                    		public void run() {
                    			while(!gameOver){
                    				webParseHtml(targetUrl);
                    				try {
                    					Thread.sleep(sleepMs);
                    				} catch (InterruptedException e) {
                    					e.printStackTrace();
                    				}
                    			}
                    		}
                    		
                    		long sleepMs = 200;
                    		boolean useJs = false;
                    		String targetUrl = "";
                    		int timeOut = 5000;
                    		
                    		public Crawler(long sleepMs, String targetUrl, boolean useJs, int timeOut) {
                    			this.sleepMs = sleepMs;
                    			this.targetUrl = targetUrl;
                    			this.useJs = useJs;
                    			this.timeOut = timeOut;
                    		}
                    		public String webParseHtml(String url) {
                    			String html = "";
                    			BrowserVersion[] versions = {BrowserVersion.INTERNET_EXPLORER_11, BrowserVersion.CHROME, BrowserVersion.FIREFOX_38, BrowserVersion.INTERNET_EXPLORER_8};
                    			WebClient client = new WebClient(versions[(int)(versions.length * Math.random())]);
                    			try {
                    				client.getOptions().setThrowExceptionOnFailingStatusCode(false);
                    				client.getOptions().setJavaScriptEnabled(useJs);
                    				client.getOptions().setCssEnabled(false);
                    				client.getOptions().setThrowExceptionOnScriptError(false);
                    				client.getOptions().setTimeout(timeOut);
                    				client.getOptions().setAppletEnabled(true);
                    				client.getOptions().setGeolocationEnabled(true);
                    				client.getOptions().setRedirectEnabled(true);
                    				
                    				String ipport = getAProxy();
                    				if (ipport != null) {
                    					ProxyConfig proxyConfig = new ProxyConfig(ipport.split(":")[0], Integer.parseInt(ipport.split(":")[1]));
                    					client.getOptions().setProxyConfig(proxyConfig);
                    				}else {
                    					System.out.print(".");
                    					return "";
                    				}
                    			
                    				HtmlPage page = client.getPage(url);
                    				html = page.asXml();
                    				
                    				System.out.println(getName() + " 使用代理 " + ipport + "請求目標網址返回HTML:" + html);
                    				
                    			} catch (Exception e) {
                    				return webParseHtml(url);
                    			} finally {
                    				client.close();
                    			}
                    			return html;
                    		}
                    		
                    	    private String getAProxy() {
                    	    	if (ipList.size() > 0) {
                    	    		String ip = ipList.get((int)(Math.random() * ipList.size()));
                    	    		return ip ;
                    			}
                    			return null;
                    		}
                    	}
                    	
                    	// 定時獲取動態IP
                    	public class GetIP implements Runnable{
                    		long sleepMs = 1000;
                    		int maxTime = 3;
                    		String order = "";
                    		
                    		public GetIP(long sleepMs, int maxTime, String order) {
                    			this.sleepMs = sleepMs;
                    			this.maxTime = maxTime;
                    			this.order = order;
                    		}
                    		
                    		@Override
                    		public void run() {
                    			long getIpTime = 0;
                    			int time = 1;
                    			while(!gameOver){
                    				if(time >= 4){
                    					gameOver = true;
                    					break;
                    				}
                    				try {
                    					java.net.URL url = new java.net.URL("http://api.ip.data5u.com/dynamic/get.html?order=" + order + "&ttl");
                    			    	HttpURLConnection connection = (HttpURLConnection)url.openConnection();
                    			    	connection.setConnectTimeout(3000);
                    			    	connection = (HttpURLConnection)url.openConnection();
                    			    	
                    			        InputStream raw = connection.getInputStream();  
                    			        InputStream in = new BufferedInputStream(raw);  
                    			        byte[] data = new byte[in.available()];
                    			        int bytesRead = 0;  
                    			        int offset = 0;  
                    			        while(offset < data.length) {  
                    			            bytesRead = in.read(data, offset, data.length - offset);  
                    			            if(bytesRead == -1) {  
                    			                break;  
                    			            }  
                    			            offset += bytesRead;  
                    			        }  
                    			        in.close();  
                    			        raw.close();
                    					String[] res = new String(data, "UTF-8").split("\n");
                    					List ipList = new ArrayList<>();
                    					for (String ip : res) {
                    						try {
                    							String[] parts = ip.split(",");
                    							if (Integer.parseInt(parts[1]) > 0) {
                    								ipList.add(parts[0]);
                    							}
                    						} catch (Exception e) {
                    						}
                    					}
                    					if (ipList.size() > 0) {
                    						TestDynamicIp.ipList = ipList;
                    						System.out.println("第" + ++getIpTime + "次獲取動態IP " + ipList.size() + " 個");
                    						time += 1;
                    					}
                    				} catch (Exception e) {
                    					e.printStackTrace();
                    					System.err.println(">>>>>>>>>>>>>>獲取IP出錯");
                    				}
                    				try {
                    					Thread.sleep(sleepMs);
                    				} catch (InterruptedException e) {
                    					e.printStackTrace();
                    				}
                    			}
                    		}
                    	}
                    	
                    }

                    無憂代理IP(www.aooseo.com)原創文章,轉載請注明出處。

                    電話:4007-745-096
                    QQ:
                    周一至周日8:30-18:00 技術部電話熱線
                    久久夜色精品国产噜噜亚洲AV_老妇女性较大毛片_888亚洲欧美国产va在线播放_超碰人人透人人爽人人看