Extract all URLs during standard page scan.

Currently not pulled through into reporting.
This commit is contained in:
Sarah Jamie Lewis 2016-04-12 22:03:39 -07:00
parent d059be932a
commit a0ae46ca31
3 changed files with 24 additions and 0 deletions

View File

@ -8,6 +8,7 @@ deanonymize.
* h12.me/socks - For the Tor SOCKS Proxy connection.
* github.com/xiam/exif - For EXIF data extraction.
* github.com/mvdan/xurls - For some URL parsing.
## OS Package Dependencies

View File

@ -2,8 +2,10 @@ package scans
import (
"github.com/s-rah/onionscan/report"
"github.com/s-rah/onionscan/utils"
"log"
"regexp"
"strings"
)
func StandardPageScan(scan Scanner, page string, status int, contents string, report *report.OnionScanReport) {
@ -11,6 +13,20 @@ func StandardPageScan(scan Scanner, page string, status int, contents string, re
if status == 200 {
log.Printf("\tPage %s%s is Accessible\n", report.HiddenService, page)
domains := utils.ExtractDomains(contents)
for _,domain := range domains {
if !strings.HasPrefix(domain, "http://"+report.HiddenService) {
log.Printf("Found Related URL %s\n", domain)
// TODO: Lots of information here which needs to be processed.
// * Links to standard sites - google / bitpay etc.
// * Links to other onion sites
// * Links to obscure clearnet sites.
} else {
// * Process Internal links
}
}
log.Printf("\tScanning for Images\n")
r := regexp.MustCompile("src=\"(" + "http://" + report.HiddenService + "/)?((.*?\\.jpg)|(.*?\\.png)|(.*?\\.jpeg)|(.*?\\.gif))\"")
foundImages := r.FindAllStringSubmatch(string(contents), -1)

7
utils/useful_regexps.go Normal file
View File

@ -0,0 +1,7 @@
package utils
import "github.com/mvdan/xurls"
func ExtractDomains(content string) []string {
return xurls.Strict.FindAllString(content, -1)
}