spider.conf

# Max number of task_threads in parallel. Each thread fetches an ourl from ourl_queue 
# and crawls webpage and generates more threads according to cur_thread_num.
# To adapt depending on your network
max_job_num=4

# From which urls to start job. 
# Comma seperated if you have more than one seed.
#seeds=http://www.imeiding.com
seeds=http://www.cnblogs.com/gaorong
#seeds=http://hi.baidu.com/qteqpid_pku

# If include_prefixes is set, We only crawl the urls that match
#include_prefixes=hi.baidu.com/qteqpid_pku/item

# If include_prefixes is set, the urls that match will NOT be crawled
#exclude_prefixes=www.imeiding.com/user

# When daemonized, the process's output will be logged in logfile rather than console 
logfile=spiderq.log

# Set the level to log. The probable values list as follow:
#   0 DEBUG
#   1 INFO
#   2 WARN
#   3 ERROR
#   4 CRIT
# Spider only logs those who's level is greater(or equal) than log_level here. 
# That means if you set log_level 0 here, You will get all logs.
log_level=0

# How deep do you want to go from seeds. If 0, we only crawl seeds and exit.
# Comment the following line if You want to go as deep as possible
max_depth=3

# The interval time(in seconds) to print stat data. 
# If you need it, just uncomment the following line
#stat_interval=2

# How to save the crawled pages. Yes means respect sites hierarchy.
# NOT supported yet!!!
#make_hostdir=yes

# Dynamic Shared Object (DSO) Support
# The path where modules(.so) will locate.
module_path=./modules/

# Which module to load. Each one a line.
# The available modules' source codes are all in modules directory.
# They will all been compiled to .so and copy to ${module_path} during make
# pre_surl: 	domainlimit
#				maxdepth
# after_header:	headerfilter
# after_resp:	savehtml
#				saveimage
load_module=savehtml
load_module=saveimage
load_module=maxdepth
load_module=domainlimit
load_module=headerfilter


# specify which type of resource we accept. Each one a line.
# text/html is accepted default
accept_types=image/jpeg