Path: blob/master/lib/anemone/cli/url_list.rb
19612 views
require 'anemone'1require 'optparse'2require 'ostruct'34options = OpenStruct.new5options.relative = false67begin8# make sure that the last option is a URL we can crawl9root = URI(ARGV.last)10rescue11puts <<-INFO12Usage:13anemone url-list [options] <url>1415Synopsis:16Crawls a site starting at the given URL, and outputs the URL of each page17in the domain as they are encountered.1819Options:20-r, --relative Output relative URLs (rather than absolute)21INFO22exit(0)23end2425# parse command-line options26opts = OptionParser.new27opts.on('-r', '--relative') { options.relative = true }28opts.parse!(ARGV)2930Anemone.crawl(root, :discard_page_bodies => true) do |anemone|3132anemone.on_every_page do |page|33if options.relative34puts page.url.path35else36puts page.url37end38end3940end414243