Path: blob/master/lib/anemone/cli/pagedepth.rb
19612 views
require 'anemone'12begin3# make sure that the first option is a URL we can crawl4root = URI(ARGV[0])5rescue6puts <<-INFO7Usage:8anemone pagedepth <url>910Synopsis:11Crawls a site starting at the given URL and outputs a count of12the number of pages at each depth of the crawl.13INFO14exit(0)15end1617Anemone.crawl(root) do |anemone|18anemone.skip_links_like %r{^/c/$}, %r{^/stores/$}1920anemone.after_crawl do |pages|21pages = pages.shortest_paths!(root).uniq!2223depths = pages.values.inject({}) do |depths, page|24depths[page.depth] ||= 025depths[page.depth] += 126depths27end2829depths.sort.each { |depth, count| puts "Depth: #{depth} Count: #{count}" }30end31end323334