# /robots.txt file for http://www.6gl.com #Examples... #This allows a crawler to access everywhere #User-agent: mr-webcrawler #Disallow: #This allows a crawler no access, as all URLs on a server start with '/' which is all of them #User-agent: mr-webcrawler #Disallow: / #This stops all robots visiting URLs with /images or /hub '*' means any other useragent #User-agent: * #Disallow: /images/ #Disallow: /hub/ #Other info from Wikipedia - http://en.wikipedia.org/wiki/Robots.txt #Crawl-delay: 30 - in seconds to wait between successive requests to the same server #Request-rate: 1/5 - maximum rate is one page every 5 seconds #Visit-time: 0600-0845 - only visit between 6:00 AM and 8:45 AM UT (GMT) #A L L O W E D #Allow Google Bot User-agent: Googlebot Disallow: Crawl-delay: 30 #D I S A L L O W E D #Disallow all spidering of images User-agent: * Disallow: /admin Disallow: /admincentre Disallow: /archive Disallow: /bin Disallow: /email Disallow: /flash Disallow: /holding Disallow: /payments Disallow: /xml Disallow: /xsl Disallow: /images/ #Disallow all spidering of images by psbot User-agent: psbot Disallow: /images/ #Disallow Google Image Bot User-agent: Googlebot-Image Disallow: / #Disallow Twiceler Bot - Cuill User-agent: twiceler Disallow: / #Disallow Exabot Bot - Exalead User-agent: Exabot Disallow: / #Disallow Voilabot Bot - France Telecom User-agent: VoilaBot Disallow: / #Disallow Yahoo Image Bot User-agent: User-agent: Yahoo-MMCrawler Disallow: / #Disallow CazoodleBot - from University of Illinois User-agent: CazoodleBot Disallow: / #Disallow semanticdiscovery - from Southern Utah University (compyter Science Dept.) User-agent: semanticdiscovery Disallow: / #Disallow Baidu Bot (Japanese) User-agent: Baiduspider Disallow: / #Disallow NimbleCrawler (http://www.webmasterworld.com/forum93/858.htm) UserAgent: nimblecrawler Disallow: / #Disallow TurnITin - "This robot collects content from the Internet for the sole purpose of helping educational institutions prevent plagiarism" User-agent: TurnitinBot Disallow: / #Disallow Seekbot - http://www.seekport.co.uk/seekbot/ User-agent: Seekbot Disallow: / #Disallow Sogou- Chinese Search Engine User-agent: Sogou Disallow: / #Disallow Mirago.com User-agent: Mirago-Test-Robot (http://www.miragorobot.com) Disallow: / #Disallow Majestic12.co.uk User-agent: MJ12bot Disallow: / #Disallow MSN from seeing gifs and jpgsd User-agent: msnbot Disallow: /*.gif$ Disallow: /*.jpeg$ #Disallow All Bots to see in '/flash' folder User-agent: * Disallow: /flash Disallow: /404.asp #O L D #Not spidering ATM, but was in original robots.txt #User-agent: Mediapartners-Google* #Disallow: