service seacat start
service seacat stop
service seacat status
usage: seacatctl command where command is: 1) list list current settings 2) set [port|rcj|vxsmin|vxsmax|pictureHome|captureWaitTime|waterMark|maxMatch|maxWidth|maxHeight] value set config 3) proxy [ on <IP> <PORT> | off ] set or clear proxy setting 4) help print this help info
例1: seacatctl list 列出当前配置信息。 例2: seacatctl set port 4444 设定海猫侦听4444端口, 在此端口中接收请求。 例3: seacatctl set rcj 4444-5555-6666 设置注册码为4444-5555-6666。 例4: seacatctl set vxsmin 5 设定最少启动5个虚拟浏览器。此数字决定了海猫能并发接受多少个抓拍请求。 例5: seacatctl set pictureHome /root 设定默认图片存贮目录为/root。 例6: seacatctl set captureWaitTime 1500 设定默认抓拍等待时间为1500毫秒。 例7: seacatctl set waterMark zhuatang.com 设定全图抓拍时加注的水印文字为zhuatang.com。 例8: seacatctl proxy on 192.168.28.91 8080 设定使用代理服务器192.168.28.91,端口8080。 例9: seacatctl proxy off 清除代理设置,不使用代理了。 例10: seacatctl set maxMatch 2 设定viewport-class和viewport-xpath最多匹配的元素个数为2,超出的元素被忽略,不会被抓拍。 例11: seacatctl set maxWidth 1200 设定全图最大宽度为1200像素,若全图实际宽度超出此设定值,则海猫不予抓取。maxWidth为0时表示无限制。 例12: seacatctl set maxHeight 2000 设定全图最大高度为2000像素,若全图实际高度超出此设定值,则海猫不予抓取。maxHeight为0时表示无限制。 注:vxsmax暂无意义,系统保留。
usage: usage: capture [-h host] [-p port] [-f] [-s WWWxHHH...] [-w timeToWait] [-o imageFormat] [-P prefix] [-i ids ] [-c classes] [-x xpath] [-r rects] url1 url2 ...
选项: -h host : 海猫所在主机,默认值:localhost -p port : 海猫所在端口,默认值:5060 -f : 设定抓拍全图 -s WWWxHHH... : 缩略图宽高列表,WWW为宽度,HHH为高度 -w timeToWait : 设定抓拍前等待时间,单位:毫秒 -o imageFormat : 指定图片格式,默认为png -P prefix : 图片文件前缀 -i ids : ID值列表,指定一个或多个要抓拍的元素ID -c classes : class值列表,指定一个或多个要抓拍的元素class名 -x xpath : xpath表达式,此表达式可匹配多个要抓拍的元素 -r rects: 指定一个或多个矩形区域,默认值"0 0 1024 768",意即左上角坐标(0,0),宽1024,高768 url1 url2...为需要抓拍的网址
输出: <url> {+++|---} <scid> +++ 表示抓拍成功 --- 表示抓拍失败
例1: capture http://www.zhuatang.com 例2: capture -s "109x82 330x220" http://www.zhuatang.com 例3: capture -f -s "82x78 400x300" http://www.zhuatang.com http://www.zaobao.com
默认侦听端口: 5060
客户端抓拍网页时请发送如下请求(每行以换行符<LF>结束,遇到空行表示请求结束)
get <url> <LF>
viewport-id <id1> <id2> ... 或 viewport-class <class1> <class2> ... 或 viewport-xpath <xpath> 或 viewport-rect: <x> <y> <width> <height>... <LF>
thumb-sizes <size1> <size2> ... <LF>
enable-full <LF>
keep-origin <LF>
picture-home <pictureHome> <LF>
prefix <prefix> <LF>
suffix <suffix> <LF>
content-length <length> <LF>
<LF>
<contents>
注:<url>是你要抓拍的网址,必选项; <id1> <id2>...是一个或多个页面元素id属性值。 <class1> <class2> ...是一个或多个页面元素class属性值,或值中有空格,须用单引号括住。 <xpath> 是一个xpath表达式,用于找到相匹配的元素。 <x>和<y>是抓拍区域左上角坐标,<width>和<height>是宽高。 <sizeN>是缩略图大小,格式为WWWxHHH,WWW是图片宽度,HHH是图片高度; 有enable-full时,海猫将会抓取整个页面; <pictureHome>可指定图片存贮目录; <prefix>可指定图片文件名前缀。 有keep-origin时,海猫将保留生成缩略图的原始图片。 <suffix>可指定图片后缀,亦即图片格式。 当某个网址需要以POST方式提交某些参数时,需要执行content-length命令,设定<length>和<contents>,前者为参数编码后的长度,后者为编码后的参数。
海猫回应信息(成功时)
SEACAT/5.5 200 OK <LF>
SCID: <SCID> <LF>
<LF>
注: <SCID>是图片文件名前缀(含路径),图片文件名格式为<SCID>-WWWxHHH.<suffix>, 整个页面所对应的图片文件名为<SCID>-full.<suffix>。<suffix>为图片格式,默认为png。
海猫回应信息(抓拍失败时)
SEACAT/5.5 <code> <message><LF>
<LF>
package com.zhsoft88.commons; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FilenameFilter; import java.io.InputStreamReader; import java.io.OutputStreamWriter; import java.net.Socket; import java.util.Arrays; import org.apache.commons.lang.math.NumberUtils; /** * seacat capture * @author zhsoft88 * @since 2008-4-13 * @update 2008-12-13 */ public class Seacat { public static final int PORT = 5060; /** * capture result * @author zhsoft88 * @since 2008-4-13 */ public static class SeacatResult { private int statusCode; private String statusText; private String scid; private String geometry; private long elaspedTime; private String prefix; private String pictureHome; private String[] suffixList; public SeacatResult() { // TODO Auto-generated constructor stub } public int getStatusCode() { return statusCode; } public String getStatusText() { return statusText; } public String getScid() { return scid; } public String getGeometry() { return geometry; } public long getElaspedTime() { return elaspedTime; } public String getPrefix() { return prefix; } public String getPictureHome() { return pictureHome; } public String[] getSuffixList() { return suffixList; } protected void setStatusCode(int statusCode) { this.statusCode = statusCode; } protected void setStatusText(String statusText) { this.statusText = statusText; } protected void setScid(String scid) { this.scid = scid; } protected void setGeometry(String geometry) { this.geometry = geometry; } protected void setElaspedTime(long elaspedTime) { this.elaspedTime = elaspedTime; } protected void setPrefix(String prefix) { this.prefix = prefix; } protected void setPictureHome(String pictureHome) { this.pictureHome = pictureHome; } protected void setSuffixList(String[] suffixList) { this.suffixList = suffixList; } public void dump() { System.out.println("SeacatResult:"); System.out.println("statusCode="+statusCode); System.out.println("statusText="+statusText); System.out.println("scid="+scid); System.out.println("geometry="+geometry); System.out.println("prefix="+prefix); System.out.println("suffixList length="+suffixList.length); for (int i=0;i<suffixList.length;i++) { System.out.println("suffixList["+i+"]="+suffixList[i]); } } } /** * capture configuration * @author zhsoft88 * @since 2008-4-13 * @update 2008-12-13 */ public static class SeacatConf { private String url; private String viewportId; private String viewportClass; private String viewportXpath; private String viewportRect; private String thumbSizes; private int waitTime; private boolean enableFull; private String pictureHome; private String prefix; private String suffix; private String postData; private boolean keepOrigin; public SeacatConf() { waitTime = -1; enableFull = false; keepOrigin = false; pictureHome = "/tmp"; } public String getUrl() { return url; } public void setUrl(String url) { this.url = url; } public String getThumbSizes() { return thumbSizes; } public void setThumbSizes(String thumbSizes) { this.thumbSizes = thumbSizes; } public int getWaitTime() { return waitTime; } public void setWaitTime(int waitTime) { this.waitTime = waitTime; } public boolean isEnableFull() { return enableFull; } public void setEnableFull(boolean enableFull) { this.enableFull = enableFull; } public String getPictureHome() { return pictureHome; } public void setPictureHome(String pictureHome) { this.pictureHome = pictureHome; } public String getPrefix() { return prefix; } public void setPrefix(String prefix) { this.prefix = prefix; } public String getSuffix() { return suffix; } public void setSuffix(String suffix) { this.suffix = suffix; } public String getPostData() { return postData; } public void setPostData(String postData) { this.postData = postData; } public String getViewportId() { return viewportId; } public void setViewportId(String viewportId) { this.viewportId = viewportId; } public String getViewportClass() { return viewportClass; } public void setViewportClass(String viewportClass) { this.viewportClass = viewportClass; } public String getViewportXpath() { return viewportXpath; } public void setViewportXpath(String viewportXpath) { this.viewportXpath = viewportXpath; } public String getViewportRect() { return viewportRect; } public void setViewportRect(String viewportRect) { this.viewportRect = viewportRect; } public boolean isKeepOrigin() { return keepOrigin; } public void setKeepOrigin(boolean keepOrigin) { this.keepOrigin = keepOrigin; } } private String host; private int port; public Seacat() { this("localhost"); } public Seacat(String host) { this(host,PORT); } public Seacat(String host,int port) { this.host = host; this.port = port; } /** * capture web page * @param conf * @return * @throws Exception */ public SeacatResult capture(SeacatConf conf) throws Exception { long t1 = System.currentTimeMillis(); Socket socket = new Socket(host,port); BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(socket.getOutputStream())); if (conf.getUrl()!=null) { bw.write("get "+conf.getUrl()+"\r\n"); } if (conf.getViewportId()!=null) { bw.write("viewport-id "+conf.getViewportId()+"\r\n"); } if (conf.getViewportClass()!=null) { bw.write("viewport-class "+conf.getViewportClass()+"\r\n"); } if (conf.getViewportXpath()!=null) { bw.write("viewport-xpath "+conf.getViewportXpath()+"\r\n"); } if (conf.getViewportRect()!=null) { bw.write("viewport-rect "+conf.getViewportRect()+"\r\n"); } if (conf.getThumbSizes()!=null) { bw.write("thumb-sizes "+conf.getThumbSizes()+"\r\n"); } if (conf.isEnableFull()) { bw.write("enable-full\r\n"); } if (conf.getWaitTime()!=-1) { bw.write("wait-time "+conf.getWaitTime()+"\r\n"); } if (conf.getPictureHome()!=null) { bw.write("picture-home "+conf.getPictureHome()+"\r\n"); } if (conf.getPrefix()!=null) { bw.write("prefix "+conf.getPrefix()+"\r\n"); } if (conf.getSuffix()!=null) { bw.write("suffix "+conf.getSuffix()+"\r\n"); } if (conf.getPostData()!=null&&conf.getPostData().length()>0) { bw.write("content-length "+conf.getPostData().length()+"\r\n"); } if (conf.isKeepOrigin()) { bw.write("keep-origin\r\n"); } bw.write("\r\n"); if (conf.getPostData()!=null&&conf.getPostData().length()>0) { bw.write(conf.getPostData()); } bw.flush(); BufferedReader br = new BufferedReader(new InputStreamReader(socket.getInputStream(),"utf-8")); String line = br.readLine(); char[] ca = line.toCharArray(); int i = 0; while (i<ca.length&&ca[i]!=' ') i++; while (i<ca.length&&ca[i]==' ') i++; // get status code StringBuilder sb = new StringBuilder(4); while (i<ca.length&&ca[i]!=' ') { sb.append(ca[i]); i++; } int statusCode = NumberUtils.toInt(sb.toString()); // get status text while (i<ca.length&&ca[i]==' ') i++; String statusText = line.substring(i); // get scid & geometry String scid = null; String geometry = null; while ((line=br.readLine())!=null) { if (line.length()==0) break; if (line.startsWith("SCID: ")) { scid = line.substring(6); } else if (line.startsWith("Geometry: ")) { geometry = line.substring(10); } } socket.close(); // String pictureHome = null; String prefix = null; String[] suffixList = new String[0]; if (scid!=null) { pictureHome = scid.substring(0,scid.lastIndexOf('/')); prefix = scid.substring(scid.lastIndexOf('/')+1); final String aPrefix = prefix; // get suffix list File[] list = new File(conf.getPictureHome()).listFiles(new FilenameFilter(){ @Override public boolean accept(File dir, String name) { return name.startsWith(aPrefix); } }); suffixList = new String[list.length]; for (int k=0;k<list.length;k++) { suffixList[k] = list[k].getName().substring(prefix.length()+1); } Arrays.sort(suffixList); } long t2 = System.currentTimeMillis(); SeacatResult result = new SeacatResult(); result.setStatusCode(statusCode); result.setStatusText(statusText); result.setScid(scid); result.setElaspedTime(t2-t1); result.setGeometry(geometry); result.setPictureHome(pictureHome); result.setPrefix(prefix); result.setSuffixList(suffixList); return result; } }
package com.zhsoft88.commons.tests; import com.zhsoft88.commons.Seacat; import com.zhsoft88.commons.Seacat.SeacatConf; import com.zhsoft88.commons.Seacat.SeacatResult; /** * seacat tester * @author zhsoft88 * * @since 2008-12-13 */ public class TestSeacat { /** * @param args */ public static void main(String[] args) throws Exception { Seacat seacat = new Seacat(); { // get snapshot by id SeacatConf conf = new SeacatConf(); conf.setUrl("http://www.google.com"); conf.setViewportId("gbar"); conf.setThumbSizes("160x120"); conf.setKeepOrigin(true); SeacatResult result = seacat.capture(conf); result.dump(); } { // get snapshot by class SeacatConf conf = new SeacatConf(); conf.setUrl("http://news.google.com"); conf.setViewportClass("lh"); conf.setThumbSizes("160x120"); conf.setEnableFull(true); conf.setKeepOrigin(true); SeacatResult result = seacat.capture(conf); result.dump(); } { // get snapshot by xpath SeacatConf conf = new SeacatConf(); conf.setUrl("http://news.google.com"); conf.setViewportXpath("//*[@id='topSection']"); conf.setThumbSizes("160x120"); conf.setKeepOrigin(true); SeacatResult result = seacat.capture(conf); result.dump(); } { // get snapshot by specific region SeacatConf conf = new SeacatConf(); conf.setUrl("http://news.google.com"); conf.setViewportRect("200 200 400 300"); conf.setThumbSizes("160x120"); conf.setKeepOrigin(true); SeacatResult result = seacat.capture(conf); result.dump(); } } }