require 'nokogiri' class HtmlTree include Enumerable def initialize(html) @root = parse_html(html) raise "Parsed HTML tree is empty" if @root.nil? end def each(order = :breadth_first, &block) return enum_for(:each, order) unless block_given? case order when :breadth_first queue = [@root].compact until queue.empty? current = queue.shift yield current queue.concat(current.children) if current.has_children? end when :depth_first stack = [@root].compact until stack.empty? current = stack.pop yield current stack.concat(current.children.reverse) if current.has_children? end else raise ArgumentError, "Unknown order: #{order}" end end private def parse_html(html) doc = Nokogiri::HTML::DocumentFragment.parse(html) root_node = doc.at_css('body') || doc.children.find(&:element?) || doc.root return nil if root_node.nil? || root_node.children.empty? build_tree(root_node) end def build_tree(node) if node.element? children = node.children.map { |child| build_tree(child) }.compact HtmlTag.new(node.name, node.attributes.transform_values(&:value), children) elsif node.text? && !node.content.strip.empty? HtmlTag.new("text", { "content" => node.content.strip }, []) end end end class HtmlTag attr_reader :name, :attributes, :children def initialize(name, attributes, children = []) @name = name @attributes = attributes @children = children end def has_children? !@children.empty? end def to_s attr_str = attributes.map { |k, v| "#{k}=#{v.inspect}" }.join(", ") "#{@name}(#{attr_str})" end end html = <<-HTML
Hello, world!