require 'nokogiri' class HtmlTree include Enumerable def initialize(html) puts "Input HTML:\n#{html.inspect}" @root = parse_html(html) raise "Parsed HTML tree is empty" if @root.nil? end def each(order = :breadth_first, &block) case order when :breadth_first breadth_first_traversal(&block) when :depth_first depth_first_traversal(&block) else raise ArgumentError, "Unknown order: #{order}" end end def select(order = :breadth_first, &block) results = [] each(order) do |node| results << node if block.call(node) end results end def reduce(accumulator = nil, order = :breadth_first, &block) each(order) do |node| accumulator = block.call(accumulator, node) end accumulator end private def parse_html(html) doc = Nokogiri::HTML::DocumentFragment.parse(html) root_node = doc.at_css('body') || doc.children.find(&:element?) || doc.root return nil if root_node.nil? || root_node.children.empty? build_tree(root_node) end def build_tree(node) if node.element? children = node.children.map { |child| build_tree(child) }.compact HtmlTag.new(node.name, node.attributes.transform_values(&:value), children) elsif node.text? && !node.content.strip.empty? HtmlTag.new("text", { "content" => node.content.strip }, []) end end def breadth_first_traversal queue = [@root].compact until queue.empty? current = queue.shift next if current.nil? yield current queue.concat(current.children.compact) if current.has_children? end end def depth_first_traversal(node = @root, &block) return if node.nil? yield node node.children.compact.each { |child| depth_first_traversal(child, &block) } if node.has_children? end end class HtmlTag attr_reader :name, :attributes, :children def initialize(name, attributes, children = []) @name = name @attributes = attributes @children = children end def has_children? !@children.empty? end def to_s "#{@name} #{@attributes}" end end html = <<-HTML
Hello, world!