service seacat start
service seacat stop
service seacat status
usage: seacatctl command where command is: 1) list list current settings 2) set [port|rcj|vxsmin|vxsmax|pictureHome|captureWaitTime|waterMark|maxMatch|maxWidth|maxHeight] value set config 3) proxy [ on <IP> <PORT> | off ] set or clear proxy setting 4) help print this help info
例1: seacatctl list 列出当前配置信息。 例2: seacatctl set port 4444 设定海猫侦听4444端口, 在此端口中接收请求。 例3: seacatctl set rcj 4444-5555-6666 设置注册码为4444-5555-6666。 例4: seacatctl set vxsmin 5 设定最少启动5个虚拟浏览器。此数字决定了海猫能并发接受多少个抓拍请求。 例5: seacatctl set pictureHome /root 设定默认图片存贮目录为/root。 例6: seacatctl set captureWaitTime 1500 设定默认抓拍等待时间为1500毫秒。 例7: seacatctl set waterMark zhuatang.com 设定全图抓拍时加注的水印文字为zhuatang.com。 例8: seacatctl proxy on 192.168.28.91 8080 设定使用代理服务器192.168.28.91,端口8080。 例9: seacatctl proxy off 清除代理设置,不使用代理了。 例10: seacatctl set maxMatch 2 设定viewport-class和viewport-xpath最多匹配的元素个数为2,超出的元素被忽略,不会被抓拍。 例11: seacatctl set maxWidth 1200 设定全图最大宽度为1200像素,若全图实际宽度超出此设定值,则海猫不予抓取。maxWidth为0时表示无限制。 例12: seacatctl set maxHeight 2000 设定全图最大高度为2000像素,若全图实际高度超出此设定值,则海猫不予抓取。maxHeight为0时表示无限制。 注:vxsmax暂无意义,系统保留。
usage: usage: capture [-h host] [-p port] [-f] [-s WWWxHHH...] [-w timeToWait] [-o imageFormat] [-P prefix] [-i ids ] [-c classes] [-x xpath] [-r rects] url1 url2 ...
选项: -h host : 海猫所在主机,默认值:localhost -p port : 海猫所在端口,默认值:5060 -f : 设定抓拍全图 -s WWWxHHH... : 缩略图宽高列表,WWW为宽度,HHH为高度 -w timeToWait : 设定抓拍前等待时间,单位:毫秒 -o imageFormat : 指定图片格式,默认为png -P prefix : 图片文件前缀 -i ids : ID值列表,指定一个或多个要抓拍的元素ID -c classes : class值列表,指定一个或多个要抓拍的元素class名 -x xpath : xpath表达式,此表达式可匹配多个要抓拍的元素 -r rects: 指定一个或多个矩形区域,默认值"0 0 1024 768",意即左上角坐标(0,0),宽1024,高768 url1 url2...为需要抓拍的网址
输出:
<url> {+++|---} <scid>
+++ 表示抓拍成功
--- 表示抓拍失败
例1: capture http://www.zhuatang.com 例2: capture -s "109x82 330x220" http://www.zhuatang.com 例3: capture -f -s "82x78 400x300" http://www.zhuatang.com http://www.zaobao.com
默认侦听端口: 5060
客户端抓拍网页时请发送如下请求(每行以换行符<LF>结束,遇到空行表示请求结束)
get <url> <LF>
viewport-id <id1> <id2> ... 或 viewport-class <class1> <class2> ... 或 viewport-xpath <xpath> 或 viewport-rect: <x> <y> <width> <height>... <LF>
thumb-sizes <size1> <size2> ... <LF>
enable-full <LF>
keep-origin <LF>
picture-home <pictureHome> <LF>
prefix <prefix> <LF>
suffix <suffix> <LF>
content-length <length> <LF>
<LF>
<contents>
注:<url>是你要抓拍的网址,必选项; <id1> <id2>...是一个或多个页面元素id属性值。 <class1> <class2> ...是一个或多个页面元素class属性值,或值中有空格,须用单引号括住。 <xpath> 是一个xpath表达式,用于找到相匹配的元素。 <x>和<y>是抓拍区域左上角坐标,<width>和<height>是宽高。 <sizeN>是缩略图大小,格式为WWWxHHH,WWW是图片宽度,HHH是图片高度; 有enable-full时,海猫将会抓取整个页面; <pictureHome>可指定图片存贮目录; <prefix>可指定图片文件名前缀。 有keep-origin时,海猫将保留生成缩略图的原始图片。 <suffix>可指定图片后缀,亦即图片格式。 当某个网址需要以POST方式提交某些参数时,需要执行content-length命令,设定<length>和<contents>,前者为参数编码后的长度,后者为编码后的参数。
海猫回应信息(成功时)
SEACAT/5.5 200 OK <LF>
SCID: <SCID> <LF>
<LF>
注: <SCID>是图片文件名前缀(含路径),图片文件名格式为<SCID>-WWWxHHH.<suffix>, 整个页面所对应的图片文件名为<SCID>-full.<suffix>。<suffix>为图片格式,默认为png。
海猫回应信息(抓拍失败时)
SEACAT/5.5 <code> <message><LF>
<LF>
package com.zhsoft88.commons;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FilenameFilter;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.Socket;
import java.util.Arrays;
import org.apache.commons.lang.math.NumberUtils;
/**
* seacat capture
* @author zhsoft88
* @since 2008-4-13
* @update 2008-12-13
*/
public class Seacat {
public static final int PORT = 5060;
/**
* capture result
* @author zhsoft88
* @since 2008-4-13
*/
public static class SeacatResult {
private int statusCode;
private String statusText;
private String scid;
private String geometry;
private long elaspedTime;
private String prefix;
private String pictureHome;
private String[] suffixList;
public SeacatResult() {
// TODO Auto-generated constructor stub
}
public int getStatusCode() {
return statusCode;
}
public String getStatusText() {
return statusText;
}
public String getScid() {
return scid;
}
public String getGeometry() {
return geometry;
}
public long getElaspedTime() {
return elaspedTime;
}
public String getPrefix() {
return prefix;
}
public String getPictureHome() {
return pictureHome;
}
public String[] getSuffixList() {
return suffixList;
}
protected void setStatusCode(int statusCode) {
this.statusCode = statusCode;
}
protected void setStatusText(String statusText) {
this.statusText = statusText;
}
protected void setScid(String scid) {
this.scid = scid;
}
protected void setGeometry(String geometry) {
this.geometry = geometry;
}
protected void setElaspedTime(long elaspedTime) {
this.elaspedTime = elaspedTime;
}
protected void setPrefix(String prefix) {
this.prefix = prefix;
}
protected void setPictureHome(String pictureHome) {
this.pictureHome = pictureHome;
}
protected void setSuffixList(String[] suffixList) {
this.suffixList = suffixList;
}
public void dump() {
System.out.println("SeacatResult:");
System.out.println("statusCode="+statusCode);
System.out.println("statusText="+statusText);
System.out.println("scid="+scid);
System.out.println("geometry="+geometry);
System.out.println("prefix="+prefix);
System.out.println("suffixList length="+suffixList.length);
for (int i=0;i<suffixList.length;i++) {
System.out.println("suffixList["+i+"]="+suffixList[i]);
}
}
}
/**
* capture configuration
* @author zhsoft88
* @since 2008-4-13
* @update 2008-12-13
*/
public static class SeacatConf {
private String url;
private String viewportId;
private String viewportClass;
private String viewportXpath;
private String viewportRect;
private String thumbSizes;
private int waitTime;
private boolean enableFull;
private String pictureHome;
private String prefix;
private String suffix;
private String postData;
private boolean keepOrigin;
public SeacatConf() {
waitTime = -1;
enableFull = false;
keepOrigin = false;
pictureHome = "/tmp";
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getThumbSizes() {
return thumbSizes;
}
public void setThumbSizes(String thumbSizes) {
this.thumbSizes = thumbSizes;
}
public int getWaitTime() {
return waitTime;
}
public void setWaitTime(int waitTime) {
this.waitTime = waitTime;
}
public boolean isEnableFull() {
return enableFull;
}
public void setEnableFull(boolean enableFull) {
this.enableFull = enableFull;
}
public String getPictureHome() {
return pictureHome;
}
public void setPictureHome(String pictureHome) {
this.pictureHome = pictureHome;
}
public String getPrefix() {
return prefix;
}
public void setPrefix(String prefix) {
this.prefix = prefix;
}
public String getSuffix() {
return suffix;
}
public void setSuffix(String suffix) {
this.suffix = suffix;
}
public String getPostData() {
return postData;
}
public void setPostData(String postData) {
this.postData = postData;
}
public String getViewportId() {
return viewportId;
}
public void setViewportId(String viewportId) {
this.viewportId = viewportId;
}
public String getViewportClass() {
return viewportClass;
}
public void setViewportClass(String viewportClass) {
this.viewportClass = viewportClass;
}
public String getViewportXpath() {
return viewportXpath;
}
public void setViewportXpath(String viewportXpath) {
this.viewportXpath = viewportXpath;
}
public String getViewportRect() {
return viewportRect;
}
public void setViewportRect(String viewportRect) {
this.viewportRect = viewportRect;
}
public boolean isKeepOrigin() {
return keepOrigin;
}
public void setKeepOrigin(boolean keepOrigin) {
this.keepOrigin = keepOrigin;
}
}
private String host;
private int port;
public Seacat() {
this("localhost");
}
public Seacat(String host) {
this(host,PORT);
}
public Seacat(String host,int port) {
this.host = host;
this.port = port;
}
/**
* capture web page
* @param conf
* @return
* @throws Exception
*/
public SeacatResult capture(SeacatConf conf) throws Exception {
long t1 = System.currentTimeMillis();
Socket socket = new Socket(host,port);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream()));
if (conf.getUrl()!=null) {
bw.write("get "+conf.getUrl()+"\r\n");
}
if (conf.getViewportId()!=null) {
bw.write("viewport-id "+conf.getViewportId()+"\r\n");
}
if (conf.getViewportClass()!=null) {
bw.write("viewport-class "+conf.getViewportClass()+"\r\n");
}
if (conf.getViewportXpath()!=null) {
bw.write("viewport-xpath "+conf.getViewportXpath()+"\r\n");
}
if (conf.getViewportRect()!=null) {
bw.write("viewport-rect "+conf.getViewportRect()+"\r\n");
}
if (conf.getThumbSizes()!=null) {
bw.write("thumb-sizes "+conf.getThumbSizes()+"\r\n");
}
if (conf.isEnableFull()) {
bw.write("enable-full\r\n");
}
if (conf.getWaitTime()!=-1) {
bw.write("wait-time "+conf.getWaitTime()+"\r\n");
}
if (conf.getPictureHome()!=null) {
bw.write("picture-home "+conf.getPictureHome()+"\r\n");
}
if (conf.getPrefix()!=null) {
bw.write("prefix "+conf.getPrefix()+"\r\n");
}
if (conf.getSuffix()!=null) {
bw.write("suffix "+conf.getSuffix()+"\r\n");
}
if (conf.getPostData()!=null&&conf.getPostData().length()>0) {
bw.write("content-length "+conf.getPostData().length()+"\r\n");
}
if (conf.isKeepOrigin()) {
bw.write("keep-origin\r\n");
}
bw.write("\r\n");
if (conf.getPostData()!=null&&conf.getPostData().length()>0) {
bw.write(conf.getPostData());
}
bw.flush();
BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream(),"utf-8"));
String line = br.readLine();
char[] ca = line.toCharArray();
int i = 0;
while (i<ca.length&&ca[i]!=' ') i++;
while (i<ca.length&&ca[i]==' ') i++;
// get status code
StringBuilder sb = new StringBuilder(4);
while (i<ca.length&&ca[i]!=' ') {
sb.append(ca[i]);
i++;
}
int statusCode = NumberUtils.toInt(sb.toString());
// get status text
while (i<ca.length&&ca[i]==' ') i++;
String statusText = line.substring(i);
// get scid & geometry
String scid = null;
String geometry = null;
while ((line=br.readLine())!=null) {
if (line.length()==0) break;
if (line.startsWith("SCID: ")) {
scid = line.substring(6);
} else if (line.startsWith("Geometry: ")) {
geometry = line.substring(10);
}
}
socket.close();
//
String pictureHome = null;
String prefix = null;
String[] suffixList = new String[0];
if (scid!=null) {
pictureHome = scid.substring(0,scid.lastIndexOf('/'));
prefix = scid.substring(scid.lastIndexOf('/')+1);
final String aPrefix = prefix;
// get suffix list
File[] list = new File(conf.getPictureHome()).listFiles(new FilenameFilter(){
@Override
public boolean accept(File dir, String name) {
return name.startsWith(aPrefix);
}
});
suffixList = new String[list.length];
for (int k=0;k<list.length;k++) {
suffixList[k] = list[k].getName().substring(prefix.length()+1);
}
Arrays.sort(suffixList);
}
long t2 = System.currentTimeMillis();
SeacatResult result = new SeacatResult();
result.setStatusCode(statusCode);
result.setStatusText(statusText);
result.setScid(scid);
result.setElaspedTime(t2-t1);
result.setGeometry(geometry);
result.setPictureHome(pictureHome);
result.setPrefix(prefix);
result.setSuffixList(suffixList);
return result;
}
}
package com.zhsoft88.commons.tests;
import com.zhsoft88.commons.Seacat;
import com.zhsoft88.commons.Seacat.SeacatConf;
import com.zhsoft88.commons.Seacat.SeacatResult;
/**
* seacat tester
* @author zhsoft88
*
* @since 2008-12-13
*/
public class TestSeacat {
/**
* @param args
*/
public static void main(String[] args) throws Exception {
Seacat seacat = new Seacat();
{
// get snapshot by id
SeacatConf conf = new SeacatConf();
conf.setUrl("http://www.google.com");
conf.setViewportId("gbar");
conf.setThumbSizes("160x120");
conf.setKeepOrigin(true);
SeacatResult result = seacat.capture(conf);
result.dump();
}
{
// get snapshot by class
SeacatConf conf = new SeacatConf();
conf.setUrl("http://news.google.com");
conf.setViewportClass("lh");
conf.setThumbSizes("160x120");
conf.setEnableFull(true);
conf.setKeepOrigin(true);
SeacatResult result = seacat.capture(conf);
result.dump();
}
{
// get snapshot by xpath
SeacatConf conf = new SeacatConf();
conf.setUrl("http://news.google.com");
conf.setViewportXpath("//*[@id='topSection']");
conf.setThumbSizes("160x120");
conf.setKeepOrigin(true);
SeacatResult result = seacat.capture(conf);
result.dump();
}
{
// get snapshot by specific region
SeacatConf conf = new SeacatConf();
conf.setUrl("http://news.google.com");
conf.setViewportRect("200 200 400 300");
conf.setThumbSizes("160x120");
conf.setKeepOrigin(true);
SeacatResult result = seacat.capture(conf);
result.dump();
}
}
}