Skip to content

Commit 1fd1f92

Browse files
committed
Add LLM-friendly documentation export feature
Implements automatic generation of LLM-friendly text files during Jekyll build: - /llms/guides.txt: All 96 developer guides - /llms/api-docs.txt: C, Android, iOS, JavaScript, and Rocky.js API documentation - /llms/community.txt: Community tools, apps, and libraries Features: - Clean markdown format with minimal metadata (title + URL) - Automatic Liquid tag/variable removal - Filters out redirect pages and stub content - Organized with "---" separators between pages - Only generates files with actual content Architecture: - lib/llms_txt_builder.rb: Reusable builder module (DRY principle) - plugins/generator_llms_txt.rb: Jekyll generator plugin (runs at :lowest priority) Signed-off-by: Ilia Breitburg <[email protected]>
1 parent 6d2cb15 commit 1fd1f92

File tree

2 files changed

+209
-0
lines changed

2 files changed

+209
-0
lines changed

lib/llms_txt_builder.rb

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
require 'fileutils'
2+
3+
module LlmsTxt
4+
# Builder module for generating LLM-friendly text files from Jekyll content
5+
class Builder
6+
SEPARATOR = "---"
7+
8+
def initialize(site)
9+
@site = site
10+
@base_url = site.config['url'] || ''
11+
end
12+
13+
# Process a Jekyll collection and return formatted content
14+
def process_collection(collection_name)
15+
collection = @site.collections[collection_name]
16+
return '' unless collection
17+
18+
content = []
19+
collection.docs.each do |doc|
20+
next if should_exclude?(doc)
21+
content << format_page(doc)
22+
end
23+
content.join("\n\n#{SEPARATOR}\n\n")
24+
end
25+
26+
# Process multiple collections and return formatted content
27+
def process_collections(collection_names)
28+
content = []
29+
collection_names.each do |name|
30+
@site.collections[name]&.docs&.each do |doc|
31+
next if should_exclude?(doc)
32+
content << format_page(doc)
33+
end
34+
end
35+
content.join("\n\n#{SEPARATOR}\n\n")
36+
end
37+
38+
# Process generated pages matching a URL pattern
39+
def process_pages(url_pattern)
40+
content = []
41+
@site.pages.each do |page|
42+
next unless page.url.match?(url_pattern)
43+
next if should_exclude?(page)
44+
content << format_page(page)
45+
end
46+
content.join("\n\n#{SEPARATOR}\n\n")
47+
end
48+
49+
# Format a single page/document
50+
def format_page(page)
51+
title = extract_title(page)
52+
url = build_url(page.url)
53+
markdown_content = extract_content(page)
54+
55+
output = []
56+
output << "# #{title}"
57+
output << "URL: #{url}"
58+
output << ""
59+
output << markdown_content
60+
output.join("\n")
61+
end
62+
63+
# Write content to a file in the llms directory
64+
def write_file(filename, content, section_name = nil)
65+
# Create temp directory for llms files
66+
tmp_dir = File.join(@site.source, '../tmp/')
67+
llms_dir = File.join(tmp_dir, 'llms')
68+
FileUtils.mkdir_p(llms_dir)
69+
70+
filepath = File.join(llms_dir, filename)
71+
72+
output = []
73+
if section_name
74+
output << "# #{section_name}"
75+
output << ""
76+
output << "This file contains all #{section_name.downcase} from the Pebble Developer Documentation."
77+
output << ""
78+
output << SEPARATOR
79+
output << ""
80+
end
81+
output << content
82+
83+
File.write(filepath, output.join("\n"))
84+
85+
# Register the file with Jekyll as a static file so it gets copied to output
86+
@site.static_files << Jekyll::StaticFile.new(@site, tmp_dir, 'llms', filename)
87+
88+
Jekyll.logger.info('LLMS.txt:', "Generated #{filename}")
89+
end
90+
91+
private
92+
93+
# Check if a page should be excluded from LLM files
94+
def should_exclude?(page)
95+
return true if page.data['llms_exclude'] == true
96+
97+
# Exclude redirect pages (they have no useful content)
98+
return true if page.data['layout'] == 'redirect'
99+
100+
# Exclude pages with very little content (likely stubs or redirects)
101+
content = page.content || ''
102+
return true if content.strip.length < 100
103+
104+
false
105+
end
106+
107+
# Extract the title from a page
108+
def extract_title(page)
109+
page.data['title'] || page.data['name'] || File.basename(page.url, '.*')
110+
end
111+
112+
# Build the full URL for a page
113+
def build_url(path)
114+
"#{@base_url}#{path}"
115+
end
116+
117+
# Extract and clean content from a page
118+
def extract_content(page)
119+
content = page.content || ''
120+
121+
# Remove liquid tags and variables (including multiline)
122+
content = content.gsub(/\{%.*?%\}/m, '')
123+
content = content.gsub(/\{\{.*?\}\}/m, '')
124+
125+
# Clean up extra whitespace
126+
content = content.gsub(/\n{3,}/, "\n\n")
127+
content.strip
128+
end
129+
end
130+
end

plugins/generator_llms_txt.rb

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
require_relative '../lib/llms_txt_builder.rb'
2+
3+
module Jekyll
4+
# Generator for creating LLM-friendly text files from documentation
5+
class GeneratorLlmsTxt < Generator
6+
# Run last so all other generators have completed
7+
priority :lowest
8+
9+
def generate(site)
10+
@site = site
11+
@builder = LlmsTxt::Builder.new(site)
12+
13+
Jekyll.logger.info('LLMS.txt:', 'Generating LLM documentation files...')
14+
15+
begin
16+
generate_guides
17+
generate_api_docs
18+
generate_community
19+
20+
Jekyll.logger.info('LLMS.txt:', 'Done.')
21+
rescue StandardError => e
22+
Jekyll.logger.error('LLMS.txt Error:', e.message)
23+
Jekyll.logger.error('LLMS.txt Error:', e.backtrace.join("\n"))
24+
end
25+
end
26+
27+
private
28+
29+
# Generate guides.txt from the guides collection
30+
def generate_guides
31+
content = @builder.process_collection('guides')
32+
if content.empty?
33+
Jekyll.logger.warn('LLMS.txt:', 'No guides found to process')
34+
return
35+
end
36+
@builder.write_file('guides.txt', content, 'Guides')
37+
end
38+
39+
# Generate api-docs.txt from generated API documentation pages
40+
def generate_api_docs
41+
# Match pages under /docs/ that are actual API documentation
42+
# This includes C, Android, iOS, JavaScript, and Rocky.js docs
43+
api_patterns = [
44+
%r{^/docs/c/},
45+
%r{^/docs/pebblekit-android/},
46+
%r{^/docs/pebblekit-ios/},
47+
%r{^/docs/pebblekit-js/},
48+
%r{^/docs/rockyjs/}
49+
]
50+
51+
content = []
52+
api_patterns.each do |pattern|
53+
pattern_content = @builder.process_pages(pattern)
54+
content << pattern_content unless pattern_content.empty?
55+
end
56+
57+
if content.empty?
58+
Jekyll.logger.warn('LLMS.txt:', 'No API docs found to process (DOCS_URL not set or no valid content)')
59+
return
60+
end
61+
62+
separator = "\n\n#{LlmsTxt::Builder::SEPARATOR}\n\n"
63+
@builder.write_file('api-docs.txt', content.join(separator), 'API Documentation')
64+
end
65+
66+
# Generate community.txt from community collections
67+
def generate_community
68+
community_collections = ['community_tools', 'community_apps', 'community_libraries']
69+
content = @builder.process_collections(community_collections)
70+
71+
if content.empty?
72+
Jekyll.logger.warn('LLMS.txt:', 'No community resources found to process')
73+
return
74+
end
75+
76+
@builder.write_file('community.txt', content, 'Community Resources')
77+
end
78+
end
79+
end

0 commit comments

Comments
 (0)