Extract all URLs during standard page scan.
Currently not pulled through into reporting.
This commit is contained in:
parent
d059be932a
commit
a0ae46ca31
|
@ -8,6 +8,7 @@ deanonymize.
|
||||||
|
|
||||||
* h12.me/socks - For the Tor SOCKS Proxy connection.
|
* h12.me/socks - For the Tor SOCKS Proxy connection.
|
||||||
* github.com/xiam/exif - For EXIF data extraction.
|
* github.com/xiam/exif - For EXIF data extraction.
|
||||||
|
* github.com/mvdan/xurls - For some URL parsing.
|
||||||
|
|
||||||
## OS Package Dependencies
|
## OS Package Dependencies
|
||||||
|
|
||||||
|
|
|
@ -2,8 +2,10 @@ package scans
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/s-rah/onionscan/report"
|
"github.com/s-rah/onionscan/report"
|
||||||
|
"github.com/s-rah/onionscan/utils"
|
||||||
"log"
|
"log"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
func StandardPageScan(scan Scanner, page string, status int, contents string, report *report.OnionScanReport) {
|
func StandardPageScan(scan Scanner, page string, status int, contents string, report *report.OnionScanReport) {
|
||||||
|
@ -11,6 +13,20 @@ func StandardPageScan(scan Scanner, page string, status int, contents string, re
|
||||||
if status == 200 {
|
if status == 200 {
|
||||||
log.Printf("\tPage %s%s is Accessible\n", report.HiddenService, page)
|
log.Printf("\tPage %s%s is Accessible\n", report.HiddenService, page)
|
||||||
|
|
||||||
|
domains := utils.ExtractDomains(contents)
|
||||||
|
|
||||||
|
for _,domain := range domains {
|
||||||
|
if !strings.HasPrefix(domain, "http://"+report.HiddenService) {
|
||||||
|
log.Printf("Found Related URL %s\n", domain)
|
||||||
|
// TODO: Lots of information here which needs to be processed.
|
||||||
|
// * Links to standard sites - google / bitpay etc.
|
||||||
|
// * Links to other onion sites
|
||||||
|
// * Links to obscure clearnet sites.
|
||||||
|
} else {
|
||||||
|
// * Process Internal links
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
log.Printf("\tScanning for Images\n")
|
log.Printf("\tScanning for Images\n")
|
||||||
r := regexp.MustCompile("src=\"(" + "http://" + report.HiddenService + "/)?((.*?\\.jpg)|(.*?\\.png)|(.*?\\.jpeg)|(.*?\\.gif))\"")
|
r := regexp.MustCompile("src=\"(" + "http://" + report.HiddenService + "/)?((.*?\\.jpg)|(.*?\\.png)|(.*?\\.jpeg)|(.*?\\.gif))\"")
|
||||||
foundImages := r.FindAllStringSubmatch(string(contents), -1)
|
foundImages := r.FindAllStringSubmatch(string(contents), -1)
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
package utils
|
||||||
|
|
||||||
|
import "github.com/mvdan/xurls"
|
||||||
|
|
||||||
|
func ExtractDomains(content string) []string {
|
||||||
|
return xurls.Strict.FindAllString(content, -1)
|
||||||
|
}
|
Loading…
Reference in New Issue