11import os
22from urllib .parse import urljoin
33from datetime import datetime
4+ from pathlib import Path
5+
46import argparse
57
6- def parse_summary ():
8+ def parse_summary (summary_file_path ):
79 """Parse URLs from the SUMMARY.md file."""
8- with open ("../../src/SUMMARY.md" , "r" ) as file :
10+ with open (summary_file_path , "r" ) as file :
911 for line in file :
1012 if "](" in line :
1113 url = line .split ("](" )[1 ].split (")" )[0 ]
@@ -23,10 +25,10 @@ def determine_priority(url_path, higher_priority_section):
2325 else :
2426 return "0.5" # All other pages
2527
26- def generate_sitemap (domain , output_path , higher_priority_section ):
28+ def generate_sitemap (domain , output_path , summary_file_path , higher_priority_section ):
2729 """Generate a sitemap XML file from SUMMARY.md structure."""
2830 domain = "https://" + domain
29- urls = parse_summary () # Add base URL to the list of URLs
31+ urls = parse_summary (summary_file_path ) # Add base URL to the list of URLs
3032 urls = ["" ] + list (urls )
3133
3234 sitemap = '<?xml version="1.0" encoding="UTF-8"?>\n '
@@ -48,11 +50,23 @@ def generate_sitemap(domain, output_path, higher_priority_section):
4850 with open (output_path , "w" ) as file :
4951 file .write (sitemap )
5052
53+ DEFAULT_SUMMARY_MD_PATH = (Path (__file__ ).parent / "../component-model/src/SUMMARY.md" ).resolve ()
54+ DEFAULT_SITEMAP_XML_PATH = (Path (__file__ ).parent / "../component-model/book/html/sitemap.sml" ).resolve ()
55+
5156if __name__ == "__main__" :
5257 parser = argparse .ArgumentParser (description = "Generate a sitemap for mdBook" )
53- parser .add_argument ("-d" , "--domain" , required = True , help = "Domain for the mdBook site (e.g., component-model.bytecodealliance.org)" )
54- parser .add_argument ("-o" , "--output-path" , default = "sitemap.xml" , help = "Output path for the sitemap file" )
58+ parser .add_argument ("-d" , "--domain" , required = True , help = "Domain for the mdBook site (e.g., 'component-model.bytecodealliance.org')" )
59+ parser .add_argument ("-o" , "--output-path" , default = DEFAULT_SITEMAP_XML_PATH , help = "Output path for the sitemap file" )
60+ parser .add_argument ("-s" , "--summary-md-path" , default = DEFAULT_SUMMARY_MD_PATH , help = "Path to SUMMARY.md" )
5561 parser .add_argument ("-p" , "--higher-priority" , help = "Subsection path (e.g., 'design') to assign a higher priority of 0.8" )
5662 args = parser .parse_args ()
5763
58- generate_sitemap (args .domain , args .output_path , args .higher_priority )
64+ summary_file_path = Path (args .summary_md_path ).resolve ()
65+ if not summary_file_path .exists ():
66+ raise FileNotFoundError (f"failed to find summary file [{ summary_file_path } ]" )
67+
68+ output_path = Path (args .output_path ).resolve ()
69+ if not output_path .parent .exists ():
70+ raise FileNotFoundError (f"failed to find output dir [{ output_path .parent } ]" )
71+
72+ generate_sitemap (args .domain , output_path , summary_file_path , args .higher_priority )
0 commit comments