onionscan/scans/standard-page-scan.go

package scans

import (
	"crypto/sha1"
	"encoding/hex"
	"github.com/s-rah/onionscan/report"
	"github.com/s-rah/onionscan/utils"
	"log"
	"net/url"
	"regexp"
	"strings"
)

func StandardPageScan(scan Scanner, page string, status int, contents string, report *report.OnionScanReport) {
	log.Printf("Scanning %s%s\n", report.HiddenService, page)
	if status == 200 {
		log.Printf("\tPage %s%s is Accessible\n", report.HiddenService, page)

		hash := sha1.Sum([]byte(contents))
		report.Hashes = append(report.Hashes, hex.EncodeToString(hash[:]))
		report.Snapshot = contents

		domains := utils.ExtractDomains(contents)

		for _, domain := range domains {
			if !strings.HasPrefix(domain, "http://"+report.HiddenService) {
				log.Printf("Found Related URL %s\n", domain)
				// TODO: Lots of information here which needs to be processed.
				// * Links to standard sites - google / bitpay etc.
				// * Links to other onion sites
				// * Links to obscure clearnet sites.
				baseUrl, _ := url.Parse(domain)
				report.AddLinkedSite(baseUrl.Host)
			} else {
				// * Process FQDN internal links (unlikly)
				log.Printf("Found Internal URL %s\n", domain)
			}
		}

		log.Printf("\tScanning for Images\n")
		r := regexp.MustCompile("src=\"(" + "http://" + report.HiddenService + "/)?((.*?\\.jpg)|(.*?\\.png)|(.*?\\.jpeg)|(.*?\\.gif))\"")
		foundImages := r.FindAllStringSubmatch(string(contents), -1)
		for _, image := range foundImages {
			log.Printf("\t Found image %s\n", image[2])
			scan.ScanPage(report.HiddenService, "/"+image[2], report, CheckExif)
		}

		log.Printf("\tScanning for Referenced Directories\n")
		r = regexp.MustCompile("(src|href)=\"([^\"]*)\"")
		foundPaths := r.FindAllStringSubmatch(string(contents), -1)
		for _, regexpResults := range foundPaths {
			path := regexpResults[2]
			if strings.HasPrefix(path, "http") {
				continue
			}

			term := strings.LastIndex(path, "/")
			if term > 0 {
				log.Printf("\t Found Referenced Directory %s\n", path[:term])
				report.AddPageReferencedDirectory(path[:term])
			}
		}
	} else if status == 403 {
		log.Printf("\tPage %s%s is Forbidden\n", report.HiddenService, page)
	} else if status == 404 {
		log.Printf("\tPage %s%s is Does Not Exist\n", report.HiddenService, page)
	}
}
Initial Commit 2016-04-10 00:04:22 +00:00			`package scans`

			`import (`
Pull referenced directories from page scan and scan them along with common directories in http scanner 2016-04-24 17:28:59 +00:00			`"crypto/sha1"`
			`"encoding/hex"`
Initial Commit 2016-04-10 00:04:22 +00:00			`"github.com/s-rah/onionscan/report"`
Extract all URLs during standard page scan. Currently not pulled through into reporting. 2016-04-13 05:03:39 +00:00			`"github.com/s-rah/onionscan/utils"`
Initial Commit 2016-04-10 00:04:22 +00:00			`"log"`
Pull referenced directories from page scan and scan them along with common directories in http scanner 2016-04-24 17:28:59 +00:00			`"net/url"`
Initial Commit 2016-04-10 00:04:22 +00:00			`"regexp"`
Extract all URLs during standard page scan. Currently not pulled through into reporting. 2016-04-13 05:03:39 +00:00			`"strings"`
Initial Commit 2016-04-10 00:04:22 +00:00			`)`

			`func StandardPageScan(scan Scanner, page string, status int, contents string, report *report.OnionScanReport) {`
			`log.Printf("Scanning %s%s\n", report.HiddenService, page)`
			`if status == 200 {`
			`log.Printf("\tPage %s%s is Accessible\n", report.HiddenService, page)`
Pull referenced directories from page scan and scan them along with common directories in http scanner 2016-04-24 17:28:59 +00:00
New Protocols Scans, SSH Fingerprinting * SSH Fingerprint * Page Snapshot * A few new Protocol Tests (FTP, SMTP, Ricochet, IRC) 2016-04-25 02:46:28 +00:00			`hash := sha1.Sum([]byte(contents))`
			`report.Hashes = append(report.Hashes, hex.EncodeToString(hash[:]))`
			`report.Snapshot = contents`
Initial Commit 2016-04-10 00:04:22 +00:00
Extract all URLs during standard page scan. Currently not pulled through into reporting. 2016-04-13 05:03:39 +00:00			`domains := utils.ExtractDomains(contents)`
Pull referenced directories from page scan and scan them along with common directories in http scanner 2016-04-24 17:28:59 +00:00
			`for _, domain := range domains {`
Extract all URLs during standard page scan. Currently not pulled through into reporting. 2016-04-13 05:03:39 +00:00			`if !strings.HasPrefix(domain, "http://"+report.HiddenService) {`
			`log.Printf("Found Related URL %s\n", domain)`
			`// TODO: Lots of information here which needs to be processed.`
			`// * Links to standard sites - google / bitpay etc.`
			`// * Links to other onion sites`
			`// * Links to obscure clearnet sites.`
Pull referenced directories from page scan and scan them along with common directories in http scanner 2016-04-24 17:28:59 +00:00			`baseUrl, _ := url.Parse(domain)`
New Protocols Scans, SSH Fingerprinting * SSH Fingerprint * Page Snapshot * A few new Protocol Tests (FTP, SMTP, Ricochet, IRC) 2016-04-25 02:46:28 +00:00			`report.AddLinkedSite(baseUrl.Host)`
Extract all URLs during standard page scan. Currently not pulled through into reporting. 2016-04-13 05:03:39 +00:00			`} else {`
Pull referenced directories from page scan and scan them along with common directories in http scanner 2016-04-24 17:28:59 +00:00			`// * Process FQDN internal links (unlikly)`
New Protocols Scans, SSH Fingerprinting * SSH Fingerprint * Page Snapshot * A few new Protocol Tests (FTP, SMTP, Ricochet, IRC) 2016-04-25 02:46:28 +00:00			`log.Printf("Found Internal URL %s\n", domain)`
Extract all URLs during standard page scan. Currently not pulled through into reporting. 2016-04-13 05:03:39 +00:00			`}`
Pull referenced directories from page scan and scan them along with common directories in http scanner 2016-04-24 17:28:59 +00:00			`}`
Extract all URLs during standard page scan. Currently not pulled through into reporting. 2016-04-13 05:03:39 +00:00
Initial Commit 2016-04-10 00:04:22 +00:00			`log.Printf("\tScanning for Images\n")`
			`r := regexp.MustCompile("src=\"(" + "http://" + report.HiddenService + "/)?((.?\\.jpg)\|(.?\\.png)\|(.?\\.jpeg)\|(.?\\.gif))\"")`
			`foundImages := r.FindAllStringSubmatch(string(contents), -1)`
			`for _, image := range foundImages {`
			`log.Printf("\t Found image %s\n", image[2])`
			`scan.ScanPage(report.HiddenService, "/"+image[2], report, CheckExif)`
			`}`
Pull referenced directories from page scan and scan them along with common directories in http scanner 2016-04-24 17:28:59 +00:00
			`log.Printf("\tScanning for Referenced Directories\n")`
			`r = regexp.MustCompile("(src\|href)=\"([^\"]*)\"")`
			`foundPaths := r.FindAllStringSubmatch(string(contents), -1)`
			`for _, regexpResults := range foundPaths {`
			`path := regexpResults[2]`
			`if strings.HasPrefix(path, "http") {`
			`continue`
			`}`

			`term := strings.LastIndex(path, "/")`
			`if term > 0 {`
			`log.Printf("\t Found Referenced Directory %s\n", path[:term])`
			`report.AddPageReferencedDirectory(path[:term])`
			`}`
			`}`
Initial Commit 2016-04-10 00:04:22 +00:00			`} else if status == 403 {`
			`log.Printf("\tPage %s%s is Forbidden\n", report.HiddenService, page)`
			`} else if status == 404 {`
			`log.Printf("\tPage %s%s is Does Not Exist\n", report.HiddenService, page)`
			`}`
			`}`