124 lines
3.4 KiB
Plaintext
124 lines
3.4 KiB
Plaintext
# this file accepts multiple criteria types
|
|
# blocks url from being inserted into spiderdb
|
|
# one line criteria per line
|
|
#
|
|
# types:
|
|
# - domain
|
|
# - file
|
|
# - host
|
|
# - hostsuffix
|
|
# - path
|
|
# - regex (pcre)
|
|
# - tld
|
|
#
|
|
# domain example
|
|
# ==============
|
|
# block all subdomain of example.com domain
|
|
# blocks: www.example.com
|
|
# blocks: sub.example.com
|
|
# blocks: example.com
|
|
domain example.com
|
|
|
|
# block all of example.com except empty subdomain & www
|
|
# blocks: sub.example.com
|
|
# allows: example.com
|
|
# allows: www.example.com
|
|
domain example.com AND NOT subdomain ,www
|
|
|
|
# block all of example.com except empty subdomain & www root pages
|
|
# blocks: example.com/dir/file.html
|
|
# allows: example.com/file.html
|
|
# allows: www.example.com/index.html
|
|
domain example.com AND NOT subdomain ,www
|
|
domain example.com AND NOT pathcriteria rootpages
|
|
|
|
# block all of example.com except empty subdomain & www index page
|
|
# blocks: example.com/file.html
|
|
# blocks: example.com/index.html
|
|
# allows: example.com/?param=value
|
|
# allows: www.example.com/
|
|
domain example.com AND NOT subdomain ,www
|
|
domain example.com AND NOT pathcriteria indexpage
|
|
|
|
# file example
|
|
# ============
|
|
# block file (exact match)
|
|
# blocks: www.example.com/wp-login.php
|
|
# blocks: www.example.com/blog/wp-login.php
|
|
# allows: www.example.com/blog/wp-login.php5
|
|
# allows: www.example.com/blog/nwp-login.php
|
|
file wp-login.php
|
|
|
|
# host example
|
|
# ============
|
|
# block specific host
|
|
# blocks: www.example.com
|
|
# allows: sub.example.com
|
|
host www.example.com
|
|
|
|
# block specific host with specific port
|
|
# blocks: www.example.com:3001
|
|
# allows: www.example.com
|
|
# allows: www.example.com:3002
|
|
host www.example.com AND port 3001
|
|
|
|
# block specific host with path (prefix)
|
|
# blocks: www.example.com/path/abc
|
|
# blocks: www.example.com/path/def
|
|
# allows: www.example.com/
|
|
# allows: www.example.com/some/
|
|
host www.example.com AND path /path/
|
|
|
|
# block host with suffix
|
|
# blocks: sub2.sub1.example.com
|
|
# blocks: sub1.example.com
|
|
# allows: www.example.com
|
|
# allows: notsub1.example.com
|
|
host sub1.example.com matchsuffix
|
|
|
|
# param example
|
|
# =============
|
|
# block specific param (case insensitive)
|
|
# blocks: www.example.com/file.html?good=abc&bad=123
|
|
# blocks: www.example.com/?bad=123
|
|
# allows: www.example.com/?good=abc
|
|
param bad
|
|
|
|
# block specific param (case insensitive) with specific value (case sensitive)
|
|
# blocks: www.example.com/file.html?good=abc&bad=xyz
|
|
# blocks: www.example.com/file.html?good=abc&BAD=xyz
|
|
# allows: www.example.com/file.html?good=abc&bad=abc
|
|
# blocks: www.example.com/file.html?good=abc&bad=XYZ
|
|
# allows: www.example.com/file.html?good=abc&bad=xyz1
|
|
param bad xyz
|
|
|
|
# path example
|
|
# ============
|
|
# blocks specific path (prefix)
|
|
# blocks: www.example.com/wp-admin
|
|
# blocks: www.example.com/wp-admin/login
|
|
# allows: www.example.com/en/wp-admin
|
|
path /wp-admin matchprefix
|
|
|
|
# block partial path
|
|
# blocks: www.example.com/badpath
|
|
# blocks: www.example.com/en/badpath
|
|
# blocks: www.example.com/badpath/subpath
|
|
path /badpath matchpartial
|
|
|
|
# regex example
|
|
# =============
|
|
# blocks url by regex
|
|
regex https?://example\.com/(a|b)\.html
|
|
|
|
# blocks url by regex (specific domain)
|
|
domain example.com AND regex https?://example\.com/(a|b)\.html
|
|
|
|
# tld example
|
|
# ===========
|
|
# block entire country specific domain
|
|
# blocks: www.example.my
|
|
# blocks: www.example.com.my
|
|
# blocks: www.example.sg
|
|
tld my,sg
|