# this file accepts multiple criteria types
# blocks url from being inserted into spiderdb
# one line criteria per line
#
# types:
#   - domain
#   - file
#   - host
#   - hostsuffix
#   - path
#   - regex (pcre)
#   - tld
#
# domain example
# ==============
# block all subdomain of example.com domain
#   blocks: www.example.com
#   blocks: sub.example.com
#   blocks: example.com
domain	example.com

# block all of example.com except empty subdomain & www
#   blocks: sub.example.com
#   allows: example.com
#   allows: www.example.com
domain	example.com		AND		NOT	subdomain	,www

# block all of example.com except empty subdomain & www root pages
#   blocks: example.com/dir/file.html
#   allows: example.com/file.html
#   allows: www.example.com/index.html
domain	example.com		AND		NOT subdomain		,www
domain	example.com		AND		NOT	pathcriteria	rootpages

# block all of example.com except empty subdomain & www index page
#   blocks: example.com/file.html
#   blocks: example.com/index.html
#   allows: example.com/?param=value
#   allows: www.example.com/
domain	example.com		AND		NOT subdomain		,www
domain	example.com		AND		NOT	pathcriteria	indexpage

# file example
# ============
# block file (exact match)
#   blocks: www.example.com/wp-login.php
#   blocks: www.example.com/blog/wp-login.php
#   allows: www.example.com/blog/wp-login.php5
#   allows: www.example.com/blog/nwp-login.php
file	wp-login.php

# host example
# ============
# block specific host
#   blocks: www.example.com
#   allows: sub.example.com
host	www.example.com

# block specific host with specific port
#   blocks: www.example.com:3001
#   allows: www.example.com
#   allows: www.example.com:3002
host	www.example.com		AND	port	3001

# block specific host with path (prefix)
#   blocks: www.example.com/path/abc
#   blocks: www.example.com/path/def
#   allows: www.example.com/
#   allows: www.example.com/some/
host	www.example.com		AND	path	/path/

# block host with suffix
#   blocks: sub2.sub1.example.com
#   blocks: sub1.example.com
#   allows: www.example.com
#   allows: notsub1.example.com
host	sub1.example.com	matchsuffix

# param example
# =============
# block specific param (case insensitive)
#   blocks: www.example.com/file.html?good=abc&bad=123
#   blocks: www.example.com/?bad=123
#   allows: www.example.com/?good=abc
param	bad

# block specific param (case insensitive) with specific value (case sensitive)
#   blocks: www.example.com/file.html?good=abc&bad=xyz
#   blocks: www.example.com/file.html?good=abc&BAD=xyz
#   allows: www.example.com/file.html?good=abc&bad=abc
#   blocks: www.example.com/file.html?good=abc&bad=XYZ
#   allows: www.example.com/file.html?good=abc&bad=xyz1
param	bad	xyz

# path example
# ============
# blocks specific path (prefix)
#   blocks: www.example.com/wp-admin
#   blocks: www.example.com/wp-admin/login
#   allows: www.example.com/en/wp-admin
path	/wp-admin	matchprefix

# block partial path
#   blocks: www.example.com/badpath 
#   blocks: www.example.com/en/badpath 
#   blocks: www.example.com/badpath/subpath
path	/badpath	matchpartial

# regex example
# =============
# blocks url by regex
regex	https?://example\.com/(a|b)\.html

# blocks url by regex (specific domain)
domain	example.com		AND	regex https?://example\.com/(a|b)\.html

# tld example
# ===========
# block entire country specific domain
#   blocks: www.example.my
#   blocks: www.example.com.my
#   blocks: www.example.sg
tld	my,sg