`
danandyu8013
  • 浏览: 37695 次
  • 性别: Icon_minigender_2
  • 来自: 北京
社区版块
存档分类
最新评论

抓网页数据

    博客分类:
  • JAVA
阅读更多

抓取网页

 

 

public String getPageWeather(String weatherCode) {

String WEATHER_ROOT = sysConf.getValue("WEATHER_JSON");

String weatherUrl = WEATHER_ROOT + weatherCode + ".html";

logger.info("网页抓取开始,地址 【" + weatherUrl + " 】");

String sresult = "";

/*try {

HttpResult result = HttpUtil.getInstance().get(weatherUrl, "utf-8");

if (result.getOK()) {

logger.info("抓取成功");

sresult = result.getContent();

}

} catch (Exception e) {

e.printStackTrace();

return "";

}*/

sresult = this.getPageContent(weatherUrl, "", 50000000);

System.out.println(sresult);

return sresult;

}

 

 

 

 

public String getPageContent(String strUrl, String strPostRequest, int maxLength) {

// 读取结果网页

StringBuffer buffer = new StringBuffer();

try {

URL newUrl = new URL(strUrl);

HttpURLConnection hConnect = (HttpURLConnection) newUrl.openConnection();

hConnect.setReadTimeout(Integer.parseInt(sysConf.getValue("TIME_OUT")));

// POST方式的额外数据

if (strPostRequest.length() > 0) {

hConnect.setDoOutput(true);

OutputStreamWriter out = new OutputStreamWriter(hConnect.getOutputStream());

out.write(strPostRequest);

out.flush();

out.close();

}

// 读取内容

 

BufferedReader rd = new BufferedReader(new InputStreamReader(hConnect.getInputStream(), "utf-8"));

int ch;

for (int length = 0; (ch = rd.read()) > -1 && (maxLength <= 0 || length < maxLength); length++)

buffer.append((char) ch);

rd.close();

hConnect.disconnect();

return buffer.toString().trim();

} catch (Exception e) {

logger.info("对方主动关闭socket连接,放弃抓取--" );//+ e.getMessage(), e);

//e.printStackTrace();

return "";

 

}

}

分享到:
评论
1 楼 spp_1987 2013-04-23  
如果我想做一个自动抓取的网页程序, 那数据是怎么放?

相关推荐

Global site tag (gtag.js) - Google Analytics