Skip to content

Commit

Permalink
v0.007 配置信息动态读写实现
Browse files Browse the repository at this point in the history
  • Loading branch information
yihui committed Jul 27, 2017
1 parent 0732630 commit 7c40c0a
Show file tree
Hide file tree
Showing 10 changed files with 393 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package com.quick.hui.crawler.common;


import java.io.*;
import java.net.URL;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
* Created by yihui on 2017/5/6.
*/
public class FileReadUtil {

/**
* 以字节为单位读取文件,常用于读二进制文件,如图片、声音、影像等文件。
*
* @param fileName 文件的名
*/
public static InputStream createByteRead(String fileName) throws IOException {

// File file = new File(fileName);
//
// return new FileInputStream(file);
return getStreamByFileName(fileName);
}


/**
* 以字符为单位读取文件,常用于读文本,数字等类型的文件
*
* @param fileName 文件名
*/
public static Reader createCharRead(String fileName) throws IOException {
// File file = new File(fileName);
// return new InputStreamReader(new FileInputStream(file), Charset.forName("UTF-8"));

return new InputStreamReader(getStreamByFileName(fileName), Charset.forName("UTF-8"));
}


/**
* 以行为单位读取文件,常用于读面向行的格式化文件
*
* @param fileName 文件名
*/
public static BufferedReader createLineRead(String fileName) throws IOException {
// File file = new File(fileName);
//// return new BufferedReader(new FileReader(file));
// return new BufferedReader(new InputStreamReader(new FileInputStream(file), Charset.forName("UTF-8")));

return new BufferedReader(new InputStreamReader(getStreamByFileName(fileName), Charset.forName("UTF-8")));
}


public static InputStream getStreamByFileName(String fileName) throws IOException {
check(fileName);

if (fileName.startsWith("http")) { // 网络地址
URL url = new URL(fileName);
return url.openStream();
} else if (fileName.startsWith("/")) { // 绝对路径
Path path = Paths.get(fileName);
return Files.newInputStream(path);
} else { // 相对路径
return FileReadUtil.class.getClassLoader().getResourceAsStream(fileName);
}
}


public static File getFile(String fileName) throws IOException {
check(fileName);

if (fileName.startsWith("http")) { // 网络地址
URL url = new URL(fileName);
fileName = url.getFile();
} else if (!fileName.startsWith("/")){ // 相对路径
URL url = FileReadUtil.class.getClassLoader().getResource(fileName);
check(url, "System do not have this file : " + fileName);
fileName = url.getFile();
}

return new File(fileName);
}



private static void check(Object arg) {
check(arg, "params should not be null!");
}

private static void check(Object arg, String msg) {
if (arg == null) {
throw new IllegalArgumentException(msg);
}
}

}
15 changes: 15 additions & 0 deletions core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@
<artifactId>logback-classic</artifactId>
<version>1.1.7</version>
</dependency>
<dependency>
<groupId>com.quick.hui.crawler</groupId>
<artifactId>common</artifactId>
<version>1.0-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>


</dependencies>
Expand Down
35 changes: 35 additions & 0 deletions core/src/main/java/com/quick/hui/crawler/core/conf/Config.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
package com.quick.hui.crawler.core.conf;

import com.quick.hui.crawler.core.utils.NumUtils;
import lombok.Getter;
import lombok.Setter;
import lombok.ToString;

/**
* Created by yihui on 2017/7/8.
*/
@Getter
@Setter
@ToString
public class Config {

/**
* 爬取任务的间隔时间
*/
private long sleep;


/**
* 从队列中获取任务,返回空时,等待时间之后再进行重试
*/
private long emptyQueueWaitTime;


public void setSleep(String str, long sleep) {
this.sleep = NumUtils.str2long(str, sleep);
}

public void setEmptyQueueWaitTime(String str, long emptyQueueWaitTime) {
this.emptyQueueWaitTime = NumUtils.str2long(str, emptyQueueWaitTime);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package com.quick.hui.crawler.core.conf;

import com.google.common.eventbus.EventBus;
import com.google.common.eventbus.Subscribe;
import com.quick.hui.crawler.core.conf.file.FileConfRead;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.slf4j.Slf4j;


/**
* Created by yihui on 2017/7/8.
*/
@Slf4j
public class ConfigWrapper {
private static final String CONFIG_PATH = "conf/crawler.properties";

private EventBus eventBus;


private IConfRead confRead;

private Config config;

private static volatile ConfigWrapper instance;

private ConfigWrapper() {
confRead = new FileConfRead();
confRead.registerCheckTask(CONFIG_PATH);
config = confRead.initConf(CONFIG_PATH);


// 注册监听器
eventBus = new EventBus();
eventBus.register(this);
}


public static ConfigWrapper getInstance() {
if (instance == null) {
synchronized (ConfigWrapper.class) {
if (instance == null) {
instance = new ConfigWrapper();
}
}
}

return instance;
}


@Subscribe
public void init(UpdateConfEvent event) {
config = confRead.initConf(event.conf);

if (log.isDebugEnabled()) {
log.debug("time:{} processor:{} update config! new config is: {}",
event.now, event.operator, config);
}
}


public Config getConfig() {
return config;
}


public void post(Object event) {
eventBus.post(event);
}

@Getter
@Setter
public static class UpdateConfEvent {
private long now = System.currentTimeMillis();

private String operator = "System";

private String conf = CONFIG_PATH;
}
}
25 changes: 25 additions & 0 deletions core/src/main/java/com/quick/hui/crawler/core/conf/IConfRead.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package com.quick.hui.crawler.core.conf;

/**
* 读取配置文件的接口
* <p>
* Created by yihui on 2017/7/8.
*/
public interface IConfRead {

/**
* 初始化配置信息
*
* @param var
* @return
*/
Config initConf(String var);


/**
* 注册配置信息更新检测任务
*
* @param path
*/
void registerCheckTask(final String path);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
package com.quick.hui.crawler.core.conf.file;

import com.quick.hui.crawler.common.FileReadUtil;
import com.quick.hui.crawler.core.conf.Config;
import com.quick.hui.crawler.core.conf.ConfigWrapper;
import com.quick.hui.crawler.core.conf.IConfRead;
import lombok.extern.slf4j.Slf4j;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Properties;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;

/**
* 从配置文件中获取配置信息
* <p>
* Created by yihui on 2017/7/8.
*/
@Slf4j
public class FileConfRead implements IConfRead {


public Config initConf(String path) {
try {
Properties properties = read(path);

Config config = new Config();
config.setSleep(properties.getProperty("sleep"), 0);
config.setEmptyQueueWaitTime(properties.getProperty("emptyQueueWaitTime"), 200);

return config;
} catch (Exception e) {
log.error("init config from file: {} error! e: {}", path, e);
return new Config();
}
}


private Properties read(String fileName) throws IOException {
try (InputStream inputStream = FileReadUtil.getStreamByFileName(fileName)) {
Properties pro = new Properties();
pro.load(inputStream);
return pro;
}
}


private File file;
private long lastTime;

public void registerCheckTask(final String path) {
try {
file = FileReadUtil.getFile(path);
lastTime = file.lastModified();


ScheduledExecutorService scheduledExecutorService = Executors.newScheduledThreadPool(1);
scheduledExecutorService.scheduleAtFixedRate(() -> {
if (file.lastModified() > lastTime) {
lastTime = file.lastModified();
ConfigWrapper.getInstance().post(new ConfigWrapper.UpdateConfEvent());
}
},
1,
1,
TimeUnit.MINUTES);
} catch (Exception e) {
throw new RuntimeException(e);
}
}

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.quick.hui.crawler.core.fetcher;

import com.quick.hui.crawler.core.conf.ConfigWrapper;
import com.quick.hui.crawler.core.entity.CrawlMeta;
import com.quick.hui.crawler.core.job.DefaultAbstractCrawlJob;
import lombok.*;
Expand Down Expand Up @@ -74,11 +75,23 @@ public <T extends DefaultAbstractCrawlJob> void start(Class<T> clz) throws Excep
while (!fetchQueue.isOver) {
crawlMeta = fetchQueue.pollSeed();
if (crawlMeta == null) {
Thread.sleep(200);
Thread.sleep(ConfigWrapper.getInstance().getConfig().getEmptyQueueWaitTime());
continue;
}


try {
long sleep = ConfigWrapper.getInstance().getConfig().getSleep();
Thread.sleep(sleep);

if (log.isDebugEnabled()) {
log.debug("Sleep {} ms", sleep);
}
} catch (Exception e) {
log.error("fetcher sleep exception! e:{} ", e);
}


DefaultAbstractCrawlJob job = clz.newInstance();
job.setDepth(this.maxDepth);
job.setCrawlMeta(crawlMeta);
Expand Down
Loading

0 comments on commit 7c40c0a

Please sign in to comment.