From 74bf8a32d6b5f5ed0115c9f5a5e827ee7ae5e179 Mon Sep 17 00:00:00 2001 From: Lucy Date: Tue, 30 Sep 2025 23:56:51 +0200 Subject: [PATCH 01/10] meow --- Cargo.lock | 134 +++++++++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 7 +++ 2 files changed, 141 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 160f51f..5e59b37 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -134,6 +134,26 @@ version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" +[[package]] +name = "bincode" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", +] + [[package]] name = "bitflags" version = "1.3.2" @@ -204,6 +224,12 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +[[package]] +name = "cfg_aliases" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" + [[package]] name = "clap" version = "4.5.48" @@ -311,6 +337,21 @@ dependencies = [ "libc", ] +[[package]] +name = "crc" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675" +dependencies = [ + "crc-catalog", +] + +[[package]] +name = "crc-catalog" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5" + [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -494,6 +535,18 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "dialoguer" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25f104b501bf2364e78d0d3974cbc774f738f5865306ed128e1e0d7499c0ad96" +dependencies = [ + "console", + "shell-words", + "tempfile", + "zeroize", +] + [[package]] name = "digest" version = "0.10.7" @@ -768,6 +821,19 @@ version = "0.32.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e629b9b98ef3dd8afe6ca2bd0f89306cec16d43d907889945bc5d6687f2f13c7" +[[package]] +name = "gptman" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb0a854011d1d26666b8d7aee7542ad66d504d1fab58b3728fb630f8bde53b57" +dependencies = [ + "bincode", + "crc", + "nix", + "serde", + "thiserror 2.0.17", +] + [[package]] name = "h2" version = "0.4.12" @@ -822,6 +888,12 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "html5ever" version = "0.27.0" @@ -1368,6 +1440,18 @@ version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" +[[package]] +name = "nix" +version = "0.30.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6" +dependencies = [ + "bitflags 2.9.4", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "nu-ansi-term" version = "0.50.1" @@ -1466,6 +1550,9 @@ dependencies = [ "clap", "console", "crossterm 0.29.0", + "dialoguer", + "gptman", + "hex", "html_parser", "indicatif", "inquire", @@ -1479,11 +1566,15 @@ dependencies = [ "semver", "serde", "serde_json", + "shell-words", "spinners", + "tokio", "tracing", "tracing-appender", "tracing-subscriber", "tui", + "url", + "uuid", ] [[package]] @@ -2147,6 +2238,12 @@ dependencies = [ "lazy_static", ] +[[package]] +name = "shell-words" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" + [[package]] name = "shlex" version = "1.3.0" @@ -2510,12 +2607,26 @@ dependencies = [ "io-uring", "libc", "mio 1.0.4", + "parking_lot", "pin-project-lite", + "signal-hook-registry", "slab", "socket2", + "tokio-macros", "windows-sys 0.59.0", ] +[[package]] +name = "tokio-macros" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "tokio-native-tls" version = "0.3.1" @@ -2745,6 +2856,12 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + [[package]] name = "url" version = "2.5.7" @@ -2775,6 +2892,17 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" +[[package]] +name = "uuid" +version = "1.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +dependencies = [ + "getrandom 0.3.3", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "valuable" version = "0.1.1" @@ -2793,6 +2921,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + [[package]] name = "want" version = "0.3.1" diff --git a/Cargo.toml b/Cargo.toml index 51cbdc4..43e0f04 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -38,6 +38,13 @@ tui = "0.19.0" tracing = "0.1.41" tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3.20", features = ["env-filter", "fmt"] } +gptman = "2.0.1" +dialoguer = "0.12.0" +tokio = { version = "1.47.1", features = ["full"] } +shell-words = "1.1.0" +url = "2.5.7" +uuid = { version = "1.18.1", features = ["v4"] } +hex = "0.4.3" [features] # TUI feature flag From 3ce470e019410fc1c4617c24f088647f881c4eca Mon Sep 17 00:00:00 2001 From: m00d Date: Wed, 1 Oct 2025 06:58:04 +0200 Subject: [PATCH 02/10] Integrate metadata documentation and jhalfs manifests --- Cargo.lock | 809 +++++++++---- Cargo.toml | 12 +- README.md | 37 +- ai/bugs.json | 29 + ai/metadata/cache/mlfs-md5sums.txt | 97 ++ ai/metadata/cache/mlfs-wget-list.txt | 97 ++ ai/metadata/index.json | 16 + .../packages/mlfs/binutils-pass-1.json | 113 ++ ai/metadata/packages/mlfs/binutils-pass1.json | 147 +++ ai/metadata/schema.json | 377 ++++++ ai/notes.md | 15 + ai/personas.json | 24 + ai/tasks.json | 56 + data/mlfs_ml-12.4-40-multilib.json | 1019 +++++++++++++++++ docs/ARCHITECTURE.md | 117 ++ docs/METADATA_PIPELINE.md | 83 ++ src/ai/mod.rs | 79 ++ src/bin/metadata_indexer.rs | 1017 ++++++++++++++++ src/db/mod.rs | 107 ++ src/db/models.rs | 104 ++ src/db/schema.rs | 19 + src/ingest/blfs.rs | 113 ++ src/ingest/glfs.rs | 109 ++ src/ingest/lfs.rs | 169 +++ src/ingest/mod.rs | 67 ++ src/lib.rs | 10 + src/main.rs | 427 ++++++- src/pkgs/mlfs.rs | 116 ++ src/pkgs/mod.rs | 3 + src/pkgs/package.rs | 74 ++ src/pkgs/scaffolder.rs | 293 +++++ src/tui/disk_manager.rs | 20 +- src/tui/main_menu.rs | 5 +- src/tui/settings.rs | 4 +- 34 files changed, 5544 insertions(+), 240 deletions(-) create mode 100644 ai/bugs.json create mode 100644 ai/metadata/cache/mlfs-md5sums.txt create mode 100644 ai/metadata/cache/mlfs-wget-list.txt create mode 100644 ai/metadata/index.json create mode 100644 ai/metadata/packages/mlfs/binutils-pass-1.json create mode 100644 ai/metadata/packages/mlfs/binutils-pass1.json create mode 100644 ai/metadata/schema.json create mode 100644 ai/notes.md create mode 100644 ai/personas.json create mode 100644 ai/tasks.json create mode 100644 data/mlfs_ml-12.4-40-multilib.json create mode 100644 docs/ARCHITECTURE.md create mode 100644 docs/METADATA_PIPELINE.md create mode 100644 src/ai/mod.rs create mode 100644 src/bin/metadata_indexer.rs create mode 100644 src/db/mod.rs create mode 100644 src/db/models.rs create mode 100644 src/db/schema.rs create mode 100644 src/ingest/blfs.rs create mode 100644 src/ingest/glfs.rs create mode 100644 src/ingest/lfs.rs create mode 100644 src/ingest/mod.rs create mode 100644 src/pkgs/mlfs.rs create mode 100644 src/pkgs/package.rs create mode 100644 src/pkgs/scaffolder.rs diff --git a/Cargo.lock b/Cargo.lock index 5e59b37..5331705 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,7 @@ dependencies = [ "cfg-if", "getrandom 0.3.3", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -40,10 +41,13 @@ dependencies = [ ] [[package]] -name = "allocator-api2" -version = "0.2.21" +name = "android_system_properties" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] [[package]] name = "anstream" @@ -128,6 +132,12 @@ dependencies = [ "windows-link 0.2.0", ] +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -154,6 +164,21 @@ dependencies = [ "virtue", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -181,6 +206,12 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "byteorder" version = "1.5.0" @@ -199,15 +230,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" -[[package]] -name = "castaway" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" -dependencies = [ - "rustversion", -] - [[package]] name = "cc" version = "1.2.39" @@ -230,6 +252,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link 0.2.0", +] + [[package]] name = "clap" version = "4.5.48" @@ -276,20 +309,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" -[[package]] -name = "compact_str" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" -dependencies = [ - "castaway", - "cfg-if", - "itoa", - "rustversion", - "ryu", - "static_assertions", -] - [[package]] name = "console" version = "0.16.1" @@ -383,22 +402,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "crossterm" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" -dependencies = [ - "bitflags 2.9.4", - "crossterm_winapi", - "mio 1.0.4", - "parking_lot", - "rustix 0.38.44", - "signal-hook", - "signal-hook-mio", - "winapi", -] - [[package]] name = "crossterm" version = "0.29.0" @@ -411,7 +414,7 @@ dependencies = [ "document-features", "mio 1.0.4", "parking_lot", - "rustix 1.1.2", + "rustix", "signal-hook", "signal-hook-mio", "winapi", @@ -461,9 +464,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.20.11" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" dependencies = [ "darling_core", "darling_macro", @@ -471,9 +474,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.11" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" dependencies = [ "fnv", "ident_case", @@ -485,9 +488,9 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.11" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core", "quote", @@ -547,6 +550,42 @@ dependencies = [ "zeroize", ] +[[package]] +name = "diesel" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8496eeb328dce26ee9d9b73275d396d9bddb433fa30106cf6056dd8c3c2764c" +dependencies = [ + "diesel_derives", + "downcast-rs", + "libsqlite3-sys", + "r2d2", + "sqlite-wasm-rs", + "time", +] + +[[package]] +name = "diesel_derives" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09af0e983035368439f1383011cd87c46f41da81d0f21dc3727e2857d5a43c8e" +dependencies = [ + "diesel_table_macro_syntax", + "dsl_auto_type", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "diesel_table_macro_syntax" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe2444076b48641147115697648dc743c2c00b61adade0f01ce67133c7babe8c" +dependencies = [ + "syn 2.0.106", +] + [[package]] name = "digest" version = "0.10.7" @@ -583,6 +622,26 @@ dependencies = [ "litrs", ] +[[package]] +name = "downcast-rs" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" + +[[package]] +name = "dsl_auto_type" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd122633e4bef06db27737f21d3738fb89c8f6d5360d6d9d7635dda142a7757e" +dependencies = [ + "darling", + "either", + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "dtoa" version = "1.0.10" @@ -647,6 +706,16 @@ dependencies = [ "windows-sys 0.61.1", ] +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -665,12 +734,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - [[package]] name = "foreign-types" version = "0.3.2" @@ -695,6 +758,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "futf" version = "0.1.5" @@ -799,8 +872,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -834,6 +909,25 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.12" @@ -845,7 +939,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.3.1", "indexmap", "slab", "tokio", @@ -853,17 +947,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", -] - [[package]] name = "hashbrown" version = "0.16.0" @@ -923,6 +1006,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.3.1" @@ -934,6 +1028,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -941,7 +1046,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.3.1", ] [[package]] @@ -952,8 +1057,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "pin-project-lite", ] @@ -963,6 +1068,36 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.7.0" @@ -973,9 +1108,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -991,8 +1126,8 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.3.1", + "hyper 1.7.0", "hyper-util", "rustls", "rustls-pki-types", @@ -1009,7 +1144,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.7.0", "hyper-util", "native-tls", "tokio", @@ -1023,26 +1158,50 @@ version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http", - "http-body", - "hyper", + "http 1.3.1", + "http-body 1.0.1", + "hyper 1.7.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", - "system-configuration", + "socket2 0.6.0", + "system-configuration 0.6.1", "tokio", "tower-service", "tracing", "windows-registry", ] +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.0.0" @@ -1163,7 +1322,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown", ] [[package]] @@ -1179,12 +1338,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "indoc" -version = "2.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" - [[package]] name = "inquire" version = "0.9.1" @@ -1199,19 +1352,6 @@ dependencies = [ "unicode-width 0.2.0", ] -[[package]] -name = "instability" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435d80800b936787d62688c927b6490e887c7ef5ff9ce922c6c6050fca75eb9a" -dependencies = [ - "darling", - "indoc", - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "io-uring" version = "0.7.10" @@ -1246,12 +1386,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] -name = "itertools" -version = "0.13.0" +name = "iso8601" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "e1082f0c48f143442a1ac6122f67e360ceee130b967af4d50996e5154a45df46" dependencies = [ - "either", + "nom", ] [[package]] @@ -1270,6 +1410,36 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonschema" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a071f4f7efc9a9118dfb627a0a94ef247986e1ab8606a4c806ae2b3aa3b6978" +dependencies = [ + "ahash", + "anyhow", + "base64 0.21.7", + "bytecount", + "clap", + "fancy-regex", + "fraction", + "getrandom 0.2.16", + "iso8601", + "itoa", + "memchr", + "num-cmp", + "once_cell", + "parking_lot", + "percent-encoding", + "regex", + "reqwest 0.11.27", + "serde", + "serde_json", + "time", + "url", + "uuid", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -1283,10 +1453,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" [[package]] -name = "linux-raw-sys" -version = "0.4.15" +name = "libsqlite3-sys" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f" +dependencies = [ + "pkg-config", + "vcpkg", +] [[package]] name = "linux-raw-sys" @@ -1322,15 +1496,6 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" -[[package]] -name = "lru" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" -dependencies = [ - "hashbrown 0.15.5", -] - [[package]] name = "mac" version = "0.1.1" @@ -1452,6 +1617,15 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "nu-ansi-term" version = "0.50.1" @@ -1461,12 +1635,91 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.17.0" @@ -1547,25 +1800,28 @@ name = "package_management" version = "0.1.0" dependencies = [ "anyhow", + "chrono", "clap", "console", "crossterm 0.29.0", "dialoguer", + "diesel", "gptman", "hex", "html_parser", "indicatif", "inquire", + "jsonschema", "md5", "num_cpus", "rand 0.9.2", - "ratatui", "regex", - "reqwest", + "reqwest 0.12.23", "scraper", "semver", "serde", "serde_json", + "sha2", "shell-words", "spinners", "tokio", @@ -1575,6 +1831,7 @@ dependencies = [ "tui", "url", "uuid", + "walkdir", ] [[package]] @@ -1600,12 +1857,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - [[package]] name = "percent-encoding" version = "2.3.2" @@ -1824,6 +2075,17 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r2d2" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" +dependencies = [ + "log", + "parking_lot", + "scheduled-thread-pool", +] + [[package]] name = "rand" version = "0.8.5" @@ -1883,27 +2145,6 @@ dependencies = [ "getrandom 0.3.3", ] -[[package]] -name = "ratatui" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" -dependencies = [ - "bitflags 2.9.4", - "cassowary", - "compact_str", - "crossterm 0.28.1", - "indoc", - "instability", - "itertools", - "lru", - "paste", - "strum 0.26.3", - "unicode-segmentation", - "unicode-truncate", - "unicode-width 0.2.0", -] - [[package]] name = "redox_syscall" version = "0.5.17" @@ -1942,23 +2183,59 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +[[package]] +name = "reqwest" +version = "0.11.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +dependencies = [ + "base64 0.21.7", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 0.1.2", + "system-configuration 0.5.1", + "tokio", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + [[package]] name = "reqwest" version = "0.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-channel", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.7.0", "hyper-rustls", "hyper-tls", "hyper-util", @@ -1972,7 +2249,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tokio-native-tls", "tower", @@ -2004,19 +2281,6 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags 2.9.4", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - [[package]] name = "rustix" version = "1.1.2" @@ -2026,7 +2290,7 @@ dependencies = [ "bitflags 2.9.4", "errno", "libc", - "linux-raw-sys 0.11.0", + "linux-raw-sys", "windows-sys 0.61.1", ] @@ -2075,6 +2339,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -2084,6 +2357,15 @@ dependencies = [ "windows-sys 0.61.1", ] +[[package]] +name = "scheduled-thread-pool" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" +dependencies = [ + "parking_lot", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2305,6 +2587,16 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.0" @@ -2323,7 +2615,22 @@ checksum = "a0ef947f358b9c238923f764c72a4a9d42f2d637c46e059dbd319d6e7cfb4f82" dependencies = [ "lazy_static", "maplit", - "strum 0.24.1", + "strum", +] + +[[package]] +name = "sqlite-wasm-rs" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aead1c279716985b981b7940ef9b652d3f93d70a7296853c633b7ce8fa8088a" +dependencies = [ + "js-sys", + "once_cell", + "thiserror 2.0.17", + "tokio", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", ] [[package]] @@ -2332,12 +2639,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "string_cache" version = "0.8.9" @@ -2375,16 +2676,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" dependencies = [ - "strum_macros 0.24.3", -] - -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros 0.26.4", + "strum_macros", ] [[package]] @@ -2400,19 +2692,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.106", -] - [[package]] name = "subtle" version = "2.6.1" @@ -2441,6 +2720,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -2461,6 +2746,17 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys 0.5.0", +] + [[package]] name = "system-configuration" version = "0.6.1" @@ -2469,7 +2765,17 @@ checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ "bitflags 2.9.4", "core-foundation", - "system-configuration-sys", + "system-configuration-sys 0.6.0", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", ] [[package]] @@ -2491,7 +2797,7 @@ dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix 1.1.2", + "rustix", "windows-sys 0.61.1", ] @@ -2611,7 +2917,7 @@ dependencies = [ "pin-project-lite", "signal-hook-registry", "slab", - "socket2", + "socket2 0.6.0", "tokio-macros", "windows-sys 0.59.0", ] @@ -2669,7 +2975,7 @@ dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tower-layer", "tower-service", @@ -2684,8 +2990,8 @@ dependencies = [ "bitflags 2.9.4", "bytes", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "iri-string", "pin-project-lite", "tower", @@ -2821,17 +3127,6 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" -[[package]] -name = "unicode-truncate" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf" -dependencies = [ - "itertools", - "unicode-segmentation", - "unicode-width 0.1.14", -] - [[package]] name = "unicode-width" version = "0.1.14" @@ -2927,6 +3222,16 @@ version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3068,12 +3373,56 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.1", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.2.0", + "windows-result 0.4.0", + "windows-strings 0.5.0", +] + +[[package]] +name = "windows-implement" +version = "0.60.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edb307e42a74fb6de9bf3a02d9712678b22399c87e6fa869d6dfcd8c1b7754e0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "windows-interface" +version = "0.59.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0abd1ddbc6964ac14db11c7213d6532ef34bd9aa042c2e5935f59d7908b46a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "windows-link" version = "0.1.3" @@ -3093,8 +3442,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" dependencies = [ "windows-link 0.1.3", - "windows-result", - "windows-strings", + "windows-result 0.3.4", + "windows-strings 0.4.2", ] [[package]] @@ -3106,6 +3455,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-result" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" +dependencies = [ + "windows-link 0.2.0", +] + [[package]] name = "windows-strings" version = "0.4.2" @@ -3115,6 +3473,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-strings" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" +dependencies = [ + "windows-link 0.2.0", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -3346,6 +3713,16 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "wit-bindgen" version = "0.46.0" diff --git a/Cargo.toml b/Cargo.toml index 43e0f04..a6bc3b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ console = "0.16.1" # Optional Terminal UI crossterm = { version = "0.29.0", optional = true } -ratatui = { version = "0.29.0", optional = true } +tui = { version = "0.19.0", optional = true } # Parsing & scraping html_parser = "0.7.0" @@ -22,6 +22,10 @@ scraper = "0.19.0" regex = "1.11.3" serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.145" +jsonschema = "0.17.0" +walkdir = "2.5.0" +chrono = { version = "0.4.38", default-features = false, features = ["clock"] } +sha2 = "0.10.8" # Utilities indicatif = "0.18.0" @@ -34,7 +38,6 @@ md5 = "0.8.0" reqwest = { version = "0.12.23", features = ["blocking", "json"] } semver = "1.0.27" inquire = "0.9.1" -tui = "0.19.0" tracing = "0.1.41" tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3.20", features = ["env-filter", "fmt"] } @@ -45,14 +48,15 @@ shell-words = "1.1.0" url = "2.5.7" uuid = { version = "1.18.1", features = ["v4"] } hex = "0.4.3" +diesel = { version = "2.1.6", features = ["sqlite", "r2d2", "returning_clauses_for_sqlite_3_35"] } + [features] # TUI feature flag -tui = ["ratatui", "crossterm"] +tui = ["dep:tui", "dep:crossterm"] # Optional default features default = [] -crossterm = ["dep:crossterm"] # ----------------------- # Cargo-make tasks diff --git a/README.md b/README.md index 75fa96c..83de895 100644 --- a/README.md +++ b/README.md @@ -94,10 +94,43 @@ You can also run the project directly in the flake shell: nix run ``` +### AI metadata tooling + +The AI metadata store under `ai/metadata/` comes with a helper CLI to +validate package records against the JSON schema and regenerate +`index.json` after adding new entries: + +```bash +cargo run --bin metadata_indexer -- --base-dir . validate +cargo run --bin metadata_indexer -- --base-dir . index +``` + +Use `--compact` with `index` if you prefer single-line JSON output. + +To draft metadata for a specific book page, you can run the harvest mode. +It fetches the XHTML, scrapes the build commands, and emits a schema- +compliant JSON skeleton (pass `--dry-run` to inspect the result without +writing to disk): + +```bash +cargo run --bin metadata_indexer -- \ + --base-dir . harvest \ + --book mlfs \ + --page chapter05/binutils-pass1 \ + --dry-run +``` + +## πŸ“š Documentation + +- [Architecture Overview](docs/ARCHITECTURE.md) – high-level tour of the crate + layout, binaries, and supporting modules. +- [Metadata Harvesting Pipeline](docs/METADATA_PIPELINE.md) – how the metadata + indexer produces and validates the JSON records under `ai/metadata/`. +- `ai/notes.md` – scratchpad for ongoing research tasks (e.g., deeper jhalfs + integration). + --- ## πŸ“„ License LPKG is licensed under the [MIT License](LICENSE). - - diff --git a/ai/bugs.json b/ai/bugs.json new file mode 100644 index 0000000..c5c770a --- /dev/null +++ b/ai/bugs.json @@ -0,0 +1,29 @@ +[ + { + "id": "disk-manager-private-fields", + "title": "Disk manager accesses private GPT fields", + "description": "src/tui/disk_manager.rs reaches into gptman::GPT::partitions which is private, breaking compilation.", + "status": "open", + "owner": "default_cli", + "created_at": "2025-03-09T00:00:00Z", + "labels": ["tui", "blocking-build"] + }, + { + "id": "tui-feature-build", + "title": "TUI modules require crossterm feature gating", + "description": "When the crate is built without the `tui` feature the main binary still imports crossterm types and fails to compile.", + "status": "open", + "owner": "default_cli", + "created_at": "2025-03-09T00:00:00Z", + "labels": ["tui", "feature-gate"] + }, + { + "id": "metadata-harvest-no-source-urls", + "title": "Harvested metadata missing source URLs", + "description": "`metadata_indexer harvest --book mlfs --page chapter05/binutils-pass1 --dry-run` emits a draft record with no source URLs even after wget-list fallback; Binutils tarball discovery logic needs to inspect package download tables or improve slug matching.", + "status": "open", + "owner": "default_cli", + "created_at": "2025-10-01T04:40:00Z", + "labels": ["metadata", "ingest", "mlfs"] + } +] diff --git a/ai/metadata/cache/mlfs-md5sums.txt b/ai/metadata/cache/mlfs-md5sums.txt new file mode 100644 index 0000000..44d79f3 --- /dev/null +++ b/ai/metadata/cache/mlfs-md5sums.txt @@ -0,0 +1,97 @@ +590765dee95907dbc3c856f7255bd669 acl-2.3.2.tar.xz +227043ec2f6ca03c0948df5517f9c927 attr-2.5.2.tar.gz +1be79f7106ab6767f18391c5e22be701 autoconf-2.72.tar.xz +cea31dbf1120f890cbf2a3032cfb9a68 automake-1.18.1.tar.xz +977c8c0c5ae6309191e7768e28ebc951 bash-5.3.tar.gz +ad4db5a0eb4fdbb3f6813be4b6b3da74 bc-7.0.3.tar.xz +dee5b4267e0305a99a3c9d6131f45759 binutils-2.45.tar.xz +c28f119f405a2304ff0a7ccdcc629713 bison-3.8.2.tar.xz +67e051268d0c475ea773822f7500d0e5 bzip2-1.0.8.tar.gz +b2e687b6e664b9dd76581836c5c3e782 coreutils-9.8.tar.xz +68c5208c58236eba447d7d6d1326b821 dejagnu-1.6.3.tar.gz +d1b18b20868fb561f77861cd90b05de4 diffutils-3.12.tar.xz +113d7a7ee0710d2a670a44692a35fd2e e2fsprogs-1.47.3.tar.gz +ceefa052ded950a4c523688799193a44 elfutils-0.193.tar.bz2 +423975a2a775ff32f12c53635b463a91 expat-2.7.3.tar.xz +00fce8de158422f5ccd2666512329bd2 expect5.45.4.tar.gz +459da2d4b534801e2e2861611d823864 file-5.46.tar.gz +870cfd71c07d37ebe56f9f4aaf4ad872 findutils-4.10.0.tar.xz +2882e3179748cc9f9c23ec593d6adc8d flex-2.6.4.tar.gz +c538415c1f27bd69cbbbf3cdd5135d39 flit_core-3.12.0.tar.gz +b7014650c5f45e5d4837c31209dc0037 gawk-5.3.2.tar.xz +b861b092bf1af683c46a8aa2e689a6fd gcc-15.2.0.tar.xz +aaa600665bc89e2febb3c7bd90679115 gdbm-1.26.tar.gz +8e14e926f088e292f5f2bce95b81d10e gettext-0.26.tar.xz +23c6f5a27932b435cae94e087cb8b1f5 glibc-2.42.tar.xz +956dc04e864001a9c22429f761f2c283 gmp-6.3.0.tar.xz +31753b021ea78a21f154bf9eecb8b079 gperf-3.3.tar.gz +5d9301ed9d209c4a88c8d3a6fd08b9ac grep-3.12.tar.xz +5e4f40315a22bb8a158748e7d5094c7d groff-1.23.0.tar.gz +60c564b1bdc39d8e43b3aab4bc0fb140 grub-2.12.tar.xz +4bf5a10f287501ee8e8ebe00ef62b2c2 gzip-1.14.tar.xz +437a3e9f4a420244c90db4ab20e713b6 iana-etc-20250926.tar.gz +401d7d07682a193960bcdecafd03de94 inetutils-2.6.tar.xz +12e517cac2b57a0121cda351570f1e63 intltool-0.51.0.tar.gz +80e1f91bf59d572acc15d5c6eb4f3e7c iproute2-6.16.0.tar.xz +11ee9d335b227ea2e8579c4ba6e56138 isl-0.27.tar.xz +66d4c25ff43d1deaf9637ccda523dec8 jinja2-3.1.6.tar.gz +7be7c6f658f5fb9512e2c490349a8eeb kbd-2.9.0.tar.xz +36f2cc483745e81ede3406fa55e1065a kmod-34.2.tar.xz +0386dc14f6a081a94dfb4c2413864eed less-679.tar.gz +2be34eced7c861fea8894e7195dac636 lfs-bootscripts-20250827.tar.xz +449ade7d620b5c4eeb15a632fbaa4f74 libcap-2.76.tar.xz +92af9efad4ba398995abf44835c5d9e9 libffi-3.5.2.tar.gz +17ac6969b2015386bcb5d278a08a40b5 libpipeline-1.5.8.tar.gz +22e0a29df8af5fdde276ea3a7d351d30 libtool-2.5.4.tar.xz +1796a5d20098e9dd9e3f576803c83000 libxcrypt-4.4.38.tar.xz +feb0a3d5ecf5a4628aed7d9f8f7ab3f6 linux-6.16.9.tar.xz +dead9f5f1966d9ae56e1e32761e4e675 lz4-1.10.0.tar.gz +6eb2ebed5b24e74b6e890919331d2132 m4-1.4.20.tar.xz +c8469a3713cbbe04d955d4ae4be23eeb make-4.4.1.tar.gz +b6335533cbeac3b24cd7be31fdee8c83 man-db-2.13.1.tar.xz +16f68d70139dd2bbcae4102be4705753 man-pages-6.15.tar.xz +13a73126d25afa72a1ff0daed072f5fe markupsafe-3.0.3.tar.gz +19e0a1091cec23d369dd77d852844195 meson-1.9.1.tar.gz +5c9bc658c9fd0f940e8e3e0f09530c62 mpc-1.3.1.tar.gz +7c32c39b8b6e3ae85f25156228156061 mpfr-4.2.2.tar.xz +679987405412f970561cc85e1e6428a2 ncurses-6.5-20250809.tgz +c35f8f55f4cf60f1a916068d8f45a0f8 ninja-1.13.1.tar.gz +0ec20faeb96bbb203c8684cc7fe4432e openssl-3.5.3.tar.gz +ab0ef21ddebe09d1803575120d3f99f8 packaging-25.0.tar.gz +149327a021d41c8f88d034eab41c039f patch-2.8.tar.xz +641f99b635ebb9332a9b6a8ce8e2f3cf pcre2-10.46.tar.bz2 +7a6950a9f12d01eb96a9d2ed2f4e0072 perl-5.42.0.tar.xz +3291128c917fdb8fccd8c9e7784b643b pkgconf-2.5.1.tar.xz +90803e64f51f192f3325d25c3335d057 procps-ng-4.0.5.tar.xz +53eae841735189a896d614cba440eb10 psmisc-23.7.tar.xz +256cdb3bbf45cdce7499e52ba6c36ea3 Python-3.13.7.tar.xz +b84c0d81b2758398bb7f5b7411d3d908 python-3.13.7-docs-html.tar.bz2 +25a73bfb2a3ad7146c5e9d4408d9f6cd readline-8.3.tar.gz +6aac9b2dbafcd5b7a67a8a9bcb8036c3 sed-4.9.tar.xz +82e1d67883b713f9493659b50d13b436 setuptools-80.9.0.tar.gz +30ef46f54363db1d624587be68794ef2 shadow-4.18.0.tar.xz +d74bbdca4ab1b2bd46d3b3f8dbb0f3db sqlite-autoconf-3500400.tar.gz +63a62af5b35913459954e6e66876f2b8 sqlite-doc-3500400.tar.xz +af60786956a2dc84054fbf46652e515e sysklogd-2.7.2.tar.gz +25fe5d328e22641254761f1baa74cee0 systemd-257.8.tar.gz +a44063e2ec0cf4adfd2ed5c9e9e095c5 systemd-man-pages-257.8.tar.xz +bc6890b975d19dc9db42d0c7364dd092 sysvinit-3.14.tar.xz +a2d8042658cfd8ea939e6d911eaf4152 tar-1.35.tar.xz +1ec3444533f54d0f86cd120058e15e48 tcl8.6.17-src.tar.gz +60c71044e723b0db5f21be82929f3534 tcl8.6.17-html.tar.gz +11939a7624572814912a18e76c8d8972 texinfo-7.2.tar.xz +ad65154c48c74a9b311fe84778c5434f tzdata2025b.tar.gz +acd4360d8a5c3ef320b9db88d275dae6 udev-lfs-20230818.tar.xz +a2a3281ce76821c4bc28794fdf9d3994 util-linux-2.41.2.tar.xz +e72f31be182f1ccf4b66bef46ac1e60e vim-9.1.1806.tar.gz +65e09ee84af36821e3b1e9564aa91bd5 wheel-0.46.1.tar.gz +89a8e82cfd2ad948b349c0a69c494463 XML-Parser-2.47.tar.gz +cf5e1feb023d22c6bdaa30e84ef3abe3 xz-5.8.1.tar.xz +9855b6d802d7fe5b7bd5b196a2271655 zlib-1.3.1.tar.gz +780fc1896922b1bc52a4e90980cdda48 zstd-1.5.7.tar.gz +6a5ac7e89b791aae556de0f745916f7f bzip2-1.0.8-install_docs-1.patch +c800540039fb0707954197486b1bde70 coreutils-9.8-i18n-2.patch +0ca4d6bb8d572fbcdb13cb36cd34833e expect-5.45.4-gcc15-1.patch +9a5997c3452909b1769918c759eff8a2 glibc-2.42-fhs-1.patch +f75cca16a38da6caa7d52151f7136895 kbd-2.9.0-backspace-1.patch +3af8fd8e13cad481eeeaa48be4247445 sysvinit-3.14-consolidated-1.patch diff --git a/ai/metadata/cache/mlfs-wget-list.txt b/ai/metadata/cache/mlfs-wget-list.txt new file mode 100644 index 0000000..127aa84 --- /dev/null +++ b/ai/metadata/cache/mlfs-wget-list.txt @@ -0,0 +1,97 @@ +https://download.savannah.gnu.org/releases/acl/acl-2.3.2.tar.xz +https://download.savannah.gnu.org/releases/attr/attr-2.5.2.tar.gz +https://ftp.gnu.org/gnu/autoconf/autoconf-2.72.tar.xz +https://ftp.gnu.org/gnu/automake/automake-1.18.1.tar.xz +https://ftp.gnu.org/gnu/bash/bash-5.3.tar.gz +https://github.com/gavinhoward/bc/releases/download/7.0.3/bc-7.0.3.tar.xz +https://sourceware.org/pub/binutils/releases/binutils-2.45.tar.xz +https://ftp.gnu.org/gnu/bison/bison-3.8.2.tar.xz +https://www.sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz +https://ftp.gnu.org/gnu/coreutils/coreutils-9.8.tar.xz +https://ftp.gnu.org/gnu/dejagnu/dejagnu-1.6.3.tar.gz +https://ftp.gnu.org/gnu/diffutils/diffutils-3.12.tar.xz +https://downloads.sourceforge.net/project/e2fsprogs/e2fsprogs/v1.47.3/e2fsprogs-1.47.3.tar.gz +https://sourceware.org/ftp/elfutils/0.193/elfutils-0.193.tar.bz2 +https://github.com/libexpat/libexpat/releases/download/R_2_7_3/expat-2.7.3.tar.xz +https://prdownloads.sourceforge.net/expect/expect5.45.4.tar.gz +https://astron.com/pub/file/file-5.46.tar.gz +https://ftp.gnu.org/gnu/findutils/findutils-4.10.0.tar.xz +https://github.com/westes/flex/releases/download/v2.6.4/flex-2.6.4.tar.gz +https://pypi.org/packages/source/f/flit-core/flit_core-3.12.0.tar.gz +https://ftp.gnu.org/gnu/gawk/gawk-5.3.2.tar.xz +https://ftp.gnu.org/gnu/gcc/gcc-15.2.0/gcc-15.2.0.tar.xz +https://ftp.gnu.org/gnu/gdbm/gdbm-1.26.tar.gz +https://ftp.gnu.org/gnu/gettext/gettext-0.26.tar.xz +https://ftp.gnu.org/gnu/glibc/glibc-2.42.tar.xz +https://ftp.gnu.org/gnu/gmp/gmp-6.3.0.tar.xz +https://ftp.gnu.org/gnu/gperf/gperf-3.3.tar.gz +https://ftp.gnu.org/gnu/grep/grep-3.12.tar.xz +https://ftp.gnu.org/gnu/groff/groff-1.23.0.tar.gz +https://ftp.gnu.org/gnu/grub/grub-2.12.tar.xz +https://ftp.gnu.org/gnu/gzip/gzip-1.14.tar.xz +https://github.com/Mic92/iana-etc/releases/download/20250926/iana-etc-20250926.tar.gz +https://ftp.gnu.org/gnu/inetutils/inetutils-2.6.tar.xz +https://launchpad.net/intltool/trunk/0.51.0/+download/intltool-0.51.0.tar.gz +https://www.kernel.org/pub/linux/utils/net/iproute2/iproute2-6.16.0.tar.xz +https://libisl.sourceforge.io/isl-0.27.tar.xz +https://pypi.org/packages/source/J/Jinja2/jinja2-3.1.6.tar.gz +https://www.kernel.org/pub/linux/utils/kbd/kbd-2.9.0.tar.xz +https://www.kernel.org/pub/linux/utils/kernel/kmod/kmod-34.2.tar.xz +https://www.greenwoodsoftware.com/less/less-679.tar.gz +https://www.linuxfromscratch.org/lfs/downloads/development/lfs-bootscripts-20250827.tar.xz +https://www.kernel.org/pub/linux/libs/security/linux-privs/libcap2/libcap-2.76.tar.xz +https://github.com/libffi/libffi/releases/download/v3.5.2/libffi-3.5.2.tar.gz +https://download.savannah.gnu.org/releases/libpipeline/libpipeline-1.5.8.tar.gz +https://ftp.gnu.org/gnu/libtool/libtool-2.5.4.tar.xz +https://github.com/besser82/libxcrypt/releases/download/v4.4.38/libxcrypt-4.4.38.tar.xz +https://www.kernel.org/pub/linux/kernel/v6.x/linux-6.16.9.tar.xz +https://github.com/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz +https://ftp.gnu.org/gnu/m4/m4-1.4.20.tar.xz +https://ftp.gnu.org/gnu/make/make-4.4.1.tar.gz +https://download.savannah.gnu.org/releases/man-db/man-db-2.13.1.tar.xz +https://www.kernel.org/pub/linux/docs/man-pages/man-pages-6.15.tar.xz +https://pypi.org/packages/source/M/MarkupSafe/markupsafe-3.0.3.tar.gz +https://github.com/mesonbuild/meson/releases/download/1.9.1/meson-1.9.1.tar.gz +https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz +https://ftp.gnu.org/gnu/mpfr/mpfr-4.2.2.tar.xz +https://invisible-mirror.net/archives/ncurses/current/ncurses-6.5-20250809.tgz +https://github.com/ninja-build/ninja/archive/v1.13.1/ninja-1.13.1.tar.gz +https://github.com/openssl/openssl/releases/download/openssl-3.5.3/openssl-3.5.3.tar.gz +https://files.pythonhosted.org/packages/source/p/packaging/packaging-25.0.tar.gz +https://ftp.gnu.org/gnu/patch/patch-2.8.tar.xz +https://github.com/PCRE2Project/pcre2/releases/download/pcre2-10.46/pcre2-10.46.tar.bz2 +https://www.cpan.org/src/5.0/perl-5.42.0.tar.xz +https://distfiles.ariadne.space/pkgconf/pkgconf-2.5.1.tar.xz +https://sourceforge.net/projects/procps-ng/files/Production/procps-ng-4.0.5.tar.xz +https://sourceforge.net/projects/psmisc/files/psmisc/psmisc-23.7.tar.xz +https://www.python.org/ftp/python/3.13.7/Python-3.13.7.tar.xz +https://www.python.org/ftp/python/doc/3.13.7/python-3.13.7-docs-html.tar.bz2 +https://ftp.gnu.org/gnu/readline/readline-8.3.tar.gz +https://ftp.gnu.org/gnu/sed/sed-4.9.tar.xz +https://pypi.org/packages/source/s/setuptools/setuptools-80.9.0.tar.gz +https://github.com/shadow-maint/shadow/releases/download/4.18.0/shadow-4.18.0.tar.xz +https://sqlite.org/2025/sqlite-autoconf-3500400.tar.gz +https://anduin.linuxfromscratch.org/LFS/sqlite-doc-3500400.tar.xz +https://github.com/troglobit/sysklogd/releases/download/v2.7.2/sysklogd-2.7.2.tar.gz +https://github.com/systemd/systemd/archive/v257.8/systemd-257.8.tar.gz +https://anduin.linuxfromscratch.org/LFS/systemd-man-pages-257.8.tar.xz +https://github.com/slicer69/sysvinit/releases/download/3.14/sysvinit-3.14.tar.xz +https://ftp.gnu.org/gnu/tar/tar-1.35.tar.xz +https://downloads.sourceforge.net/tcl/tcl8.6.17-src.tar.gz +https://downloads.sourceforge.net/tcl/tcl8.6.17-html.tar.gz +https://ftp.gnu.org/gnu/texinfo/texinfo-7.2.tar.xz +https://www.iana.org/time-zones/repository/releases/tzdata2025b.tar.gz +https://anduin.linuxfromscratch.org/LFS/udev-lfs-20230818.tar.xz +https://www.kernel.org/pub/linux/utils/util-linux/v2.41/util-linux-2.41.2.tar.xz +https://github.com/vim/vim/archive/v9.1.1806/vim-9.1.1806.tar.gz +https://pypi.org/packages/source/w/wheel/wheel-0.46.1.tar.gz +https://cpan.metacpan.org/authors/id/T/TO/TODDR/XML-Parser-2.47.tar.gz +https://github.com//tukaani-project/xz/releases/download/v5.8.1/xz-5.8.1.tar.xz +https://zlib.net/fossils/zlib-1.3.1.tar.gz +https://github.com/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz +https://www.linuxfromscratch.org/patches/lfs/development/bzip2-1.0.8-install_docs-1.patch +https://www.linuxfromscratch.org/patches/lfs/development/coreutils-9.8-i18n-2.patch +https://www.linuxfromscratch.org/patches/lfs/development/expect-5.45.4-gcc15-1.patch +https://www.linuxfromscratch.org/patches/lfs/development/glibc-2.42-fhs-1.patch +https://www.linuxfromscratch.org/patches/lfs/development/kbd-2.9.0-backspace-1.patch +https://www.linuxfromscratch.org/patches/lfs/development/sysvinit-3.14-consolidated-1.patch diff --git a/ai/metadata/index.json b/ai/metadata/index.json new file mode 100644 index 0000000..684b1d8 --- /dev/null +++ b/ai/metadata/index.json @@ -0,0 +1,16 @@ +{ + "generated_at": "2025-10-01T04:35:27.106227+00:00", + "packages": [ + { + "book": "mlfs", + "id": "mlfs/binutils/pass1", + "name": "Binutils", + "path": "packages/mlfs/binutils-pass1.json", + "stage": "cross-toolchain", + "status": "draft", + "variant": "Pass 1", + "version": "2.45" + } + ], + "schema_version": "v0.1.0" +} \ No newline at end of file diff --git a/ai/metadata/packages/mlfs/binutils-pass-1.json b/ai/metadata/packages/mlfs/binutils-pass-1.json new file mode 100644 index 0000000..21e4382 --- /dev/null +++ b/ai/metadata/packages/mlfs/binutils-pass-1.json @@ -0,0 +1,113 @@ +{ + "artifacts": { + "disk": 678, + "install_prefix": null, + "sbu": 1.0 + }, + "build": [ + { + "commands": [ + "mkdir -v build", + "cd build" + ], + "cwd": null, + "notes": null, + "phase": "setup", + "requires_root": false + }, + { + "commands": [ + "../configure --prefix=$LFS/tools \\", + "--with-sysroot=$LFS \\", + "--target=$LFS_TGT \\", + "--disable-nls \\", + "--enable-gprofng=no \\", + "--disable-werror \\", + "--enable-new-dtags \\", + "--enable-default-hash-style=gnu" + ], + "cwd": null, + "notes": null, + "phase": "configure", + "requires_root": false + }, + { + "commands": [ + "make" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "make install" + ], + "cwd": null, + "notes": null, + "phase": "install", + "requires_root": false + } + ], + "dependencies": { + "build": [], + "runtime": [] + }, + "environment": { + "users": [], + "variables": [] + }, + "optimizations": { + "cflags": [ + "-O3", + "-flto" + ], + "enable_lto": true, + "enable_pgo": true, + "ldflags": [ + "-flto" + ], + "profdata": null + }, + "package": { + "anchors": { + "section": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html#ch-tools-binutils-pass1" + }, + "book": "mlfs", + "chapter": 5, + "id": "mlfs/binutils-pass-1", + "name": "Binutils", + "section": "5.2", + "stage": "cross-toolchain", + "upstream": null, + "variant": "Pass 1", + "version": "2.45" + }, + "provenance": { + "book_release": "lfs-ml-12.4-40-multilib", + "content_hash": "7c580aad04933a2f6ec5e5410a57695dd2d0b76a293212f33fd3edd226490853", + "page_url": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html", + "retrieved_at": "2025-10-01T04:57:22.375928+00:00" + }, + "schema_version": "v0.1.0", + "source": { + "archive": "binutils-2.45.tar.xz", + "checksums": [ + { + "alg": "md5", + "value": "dee5b4267e0305a99a3c9d6131f45759" + } + ], + "urls": [ + { + "kind": "primary", + "url": "https://sourceware.org/pub/binutils/releases/binutils-2.45.tar.xz" + } + ] + }, + "status": { + "issues": [], + "state": "draft" + } +} \ No newline at end of file diff --git a/ai/metadata/packages/mlfs/binutils-pass1.json b/ai/metadata/packages/mlfs/binutils-pass1.json new file mode 100644 index 0000000..303ca2d --- /dev/null +++ b/ai/metadata/packages/mlfs/binutils-pass1.json @@ -0,0 +1,147 @@ +{ + "schema_version": "v0.1.0", + "package": { + "id": "mlfs/binutils/pass1", + "name": "Binutils", + "upstream": "gnu/binutils", + "version": "2.45", + "book": "mlfs", + "chapter": 5, + "section": "5.02", + "stage": "cross-toolchain", + "variant": "Pass 1", + "anchors": { + "section": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html#ch-tools-binutils-pass1" + } + }, + "source": { + "urls": [ + { + "url": "https://ftp.gnu.org/gnu/binutils/binutils-2.45.tar.xz", + "kind": "primary" + }, + { + "url": "https://ftpmirror.gnu.org/binutils/binutils-2.45.tar.xz", + "kind": "mirror" + } + ], + "archive": "binutils-2.45.tar.xz" + }, + "artifacts": { + "sbu": 1, + "disk": 678, + "install_prefix": "$LFS/tools" + }, + "dependencies": { + "build": [ + { "name": "bash" }, + { "name": "coreutils" }, + { "name": "gcc", "optional": true } + ], + "runtime": [] + }, + "environment": { + "variables": [ + { + "name": "LFS", + "description": "Absolute path to mounted LFS workspace" + }, + { + "name": "LFS_TGT", + "description": "Target triple for cross toolchain" + } + ], + "users": [] + }, + "build": [ + { + "phase": "setup", + "commands": [ + "tar -xf binutils-2.45.tar.xz", + "cd binutils-2.45", + "mkdir -v build", + "cd build" + ] + }, + { + "phase": "configure", + "commands": [ + "../configure --prefix=$LFS/tools \\", + " --with-sysroot=$LFS \\", + " --target=$LFS_TGT \\", + " --disable-nls \\", + " --enable-gprofng=no \\", + " --disable-werror \\", + " --enable-new-dtags \\", + " --enable-default-hash-style=gnu" + ], + "cwd": "build" + }, + { + "phase": "build", + "commands": [ + "make" + ], + "cwd": "build" + }, + { + "phase": "test", + "commands": [ + "make -k check" + ], + "cwd": "build", + "notes": "Tests are optional for cross-toolchain; failures can be ignored" + }, + { + "phase": "install", + "commands": [ + "make install" + ], + "cwd": "build" + } + ], + "optimizations": { + "enable_lto": true, + "enable_pgo": true, + "cflags": ["-O3", "-flto", "-fprofile-generate"], + "ldflags": ["-flto", "-fprofile-generate"], + "profdata": null + }, + "tests": [ + { + "commands": [ + "make -k check" + ], + "optional": true, + "expected_failures": ["gas/run/elf-x86-64-reloc.sh"] + } + ], + "post_install": [ + { + "commands": [ + "rm -v $LFS/tools/lib/libbfd.a", + "rm -v $LFS/tools/lib/libctf-nobfd.a" + ], + "description": "Remove static libraries per LFS guidance" + } + ], + "notes": [ + { + "severity": "warning", + "text": "Ensure the host uses recent flex/bison to avoid configure warnings." + } + ], + "provenance": { + "book_release": "ml-12.4-40-multilib", + "page_url": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html", + "retrieved_at": "2025-03-09T00:00:00Z", + "content_hash": "0000000000000000000000000000000000000000000000000000000000000000" + }, + "status": { + "state": "draft", + "issues": [ + "Checksums not yet verified", + "Dependency list requires confirmation" + ] + } +} diff --git a/ai/metadata/schema.json b/ai/metadata/schema.json new file mode 100644 index 0000000..3fae44b --- /dev/null +++ b/ai/metadata/schema.json @@ -0,0 +1,377 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://lpkg.dev/schemas/package-metadata.json", + "title": "LPKG Package Metadata", + "type": "object", + "required": ["schema_version", "package", "source", "build", "provenance", "status"], + "additionalProperties": false, + "properties": { + "schema_version": { + "type": "string", + "pattern": "^v\\d+\\.\\d+\\.\\d+$" + }, + "package": { + "type": "object", + "required": ["id", "name", "version", "book"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "pattern": "^[a-z0-9][a-z0-9-/]*$" + }, + "name": { + "type": "string", + "minLength": 1 + }, + "upstream": { + "type": "string" + }, + "version": { + "type": "string", + "minLength": 1 + }, + "book": { + "type": "string", + "enum": ["lfs", "mlfs", "blfs", "glfs"] + }, + "chapter": { + "type": ["integer", "null"], + "minimum": 0 + }, + "section": { + "type": ["string", "null"], + "pattern": "^\\d+\\.\\d+$" + }, + "stage": { + "type": ["string", "null"], + "enum": [ + "cross-toolchain", + "temporary-tools", + "system", + "system-configuration", + "system-finalization", + "desktop", + "server", + "multilib", + "kernel", + "boot", + null + ] + }, + "variant": { + "type": ["string", "null"], + "minLength": 1 + }, + "anchors": { + "type": "object", + "additionalProperties": { + "type": "string", + "format": "uri" + } + } + } + }, + "source": { + "type": "object", + "required": ["urls"], + "additionalProperties": false, + "properties": { + "urls": { + "type": "array", + "items": { + "type": "object", + "required": ["url"], + "additionalProperties": false, + "properties": { + "url": { + "type": "string", + "format": "uri" + }, + "kind": { + "type": "string", + "enum": ["primary", "mirror", "patch", "signature"] + } + } + } + }, + "archive": { + "type": ["string", "null"] + }, + "checksums": { + "type": "array", + "items": { + "type": "object", + "required": ["alg", "value"], + "additionalProperties": false, + "properties": { + "alg": { + "type": "string", + "enum": ["md5", "sha1", "sha256", "sha512"] + }, + "value": { + "type": "string", + "pattern": "^[A-Fa-f0-9]{16,128}$" + } + } + } + } + } + }, + "artifacts": { + "type": "object", + "additionalProperties": false, + "properties": { + "sbu": { + "type": ["number", "null"], + "minimum": 0 + }, + "disk": { + "type": ["integer", "null"], + "minimum": 0, + "description": "Approximate disk usage in MB" + }, + "install_prefix": { + "type": ["string", "null"], + "minLength": 1 + } + } + }, + "dependencies": { + "type": "object", + "additionalProperties": false, + "properties": { + "build": { + "type": "array", + "items": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "optional": { + "type": "boolean", + "default": false + } + } + } + }, + "runtime": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + } + } + }, + "environment": { + "type": "object", + "additionalProperties": false, + "properties": { + "variables": { + "type": "array", + "items": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "pattern": "^[A-Z0-9_]+$" + }, + "required": { + "type": "boolean", + "default": true + }, + "description": { + "type": "string" + } + } + } + }, + "users": { + "type": "array", + "items": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "purpose": { + "type": "string" + } + } + } + } + } + }, + "build": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["phase", "commands"], + "additionalProperties": false, + "properties": { + "phase": { + "type": "string", + "enum": ["setup", "configure", "build", "test", "install", "post"] + }, + "commands": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + }, + "cwd": { + "type": ["string", "null"], + "minLength": 1 + }, + "requires_root": { + "type": "boolean", + "default": false + }, + "notes": { + "type": ["string", "null"], + "minLength": 1 + } + } + } + }, + "optimizations": { + "type": "object", + "additionalProperties": false, + "properties": { + "enable_lto": { + "type": "boolean" + }, + "enable_pgo": { + "type": "boolean" + }, + "cflags": { + "type": "array", + "items": { + "type": "string" + } + }, + "ldflags": { + "type": "array", + "items": { + "type": "string" + } + }, + "profdata": { + "type": ["string", "null"], + "minLength": 1 + } + } + }, + "tests": { + "type": "array", + "items": { + "type": "object", + "required": ["commands"], + "additionalProperties": false, + "properties": { + "commands": { + "type": "array", + "items": { + "type": "string" + } + }, + "optional": { + "type": "boolean" + }, + "expected_failures": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "post_install": { + "type": "array", + "items": { + "type": "object", + "required": ["commands"], + "additionalProperties": false, + "properties": { + "commands": { + "type": "array", + "items": { + "type": "string" + } + }, + "description": { + "type": ["string", "null"] + } + } + } + }, + "notes": { + "type": "array", + "items": { + "type": "object", + "required": ["text"], + "additionalProperties": false, + "properties": { + "severity": { + "type": "string", + "enum": ["info", "warning", "error"] + }, + "text": { + "type": "string" + } + } + } + }, + "provenance": { + "type": "object", + "required": ["book_release", "page_url", "retrieved_at"], + "additionalProperties": false, + "properties": { + "book_release": { + "type": "string" + }, + "page_url": { + "type": "string", + "format": "uri" + }, + "retrieved_at": { + "type": "string", + "format": "date-time" + }, + "content_hash": { + "type": "string", + "pattern": "^[A-Fa-f0-9]{64}$" + } + } + }, + "status": { + "type": "object", + "required": ["state"], + "additionalProperties": false, + "properties": { + "state": { + "type": "string", + "enum": ["draft", "review", "imported", "stale"] + }, + "issues": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } +} diff --git a/ai/notes.md b/ai/notes.md new file mode 100644 index 0000000..fd2e428 --- /dev/null +++ b/ai/notes.md @@ -0,0 +1,15 @@ +# Integrating jhalfs Source Metadata + +- Goal: reuse jhalfs wget-list and md5sums to populate package `source.urls` and +auto-fill checksums when harvesting metadata for MLFS/BLFS/GLFS packages. +- Data source: `https://anduin.linuxfromscratch.org/` hosts per-release + `wget-list`/`md5sums` files already curated by the jhalfs project. +- Approach: + 1. Fetch (and optionally cache under `ai/cache/`) the lists for each book. + 2. When harvesting, map `-` against the list to gather all + relevant URLs. + 3. Pull matching checksum entries to populate `source.checksums`. + 4. Keep the existing HTML scrape for chapter/stage text; jhalfs covers only + sources. +- Benefits: avoids fragile HTML tables, keeps URLs aligned with official build + scripts, and ensures checksums are up-to-date. diff --git a/ai/personas.json b/ai/personas.json new file mode 100644 index 0000000..765b0b4 --- /dev/null +++ b/ai/personas.json @@ -0,0 +1,24 @@ +[ + { + "id": "default_cli", + "name": "Codex CLI Assistant", + "description": "Default persona for repository automation; focuses on safe refactors and tooling improvements.", + "strengths": [ + "Rust and tooling pipelines", + "Workflow automation", + "Incremental migrations" + ], + "notes": "Derived from GPT-5 Codex runtime; avoids destructive operations without explicit approval." + }, + { + "id": "mlfs_researcher", + "name": "MLFS Researcher", + "description": "Persona dedicated to tracking Multilib Linux From Scratch package metadata and translating it into lpkg modules.", + "strengths": [ + "HTML scraping", + "Package manifest synthesis", + "Optimization flag tuning" + ], + "notes": "Activated when working with https://linuxfromscratch.org/~thomas/multilib-m32/ resources." + } +] diff --git a/ai/tasks.json b/ai/tasks.json new file mode 100644 index 0000000..abb56e2 --- /dev/null +++ b/ai/tasks.json @@ -0,0 +1,56 @@ +{ + "generated_at": "2025-03-09T00:00:00Z", + "unfinished": [ + { + "id": "mlfs-package-import", + "title": "Import all MLFS packages into lpkg", + "description": "Parse the Multilib LFS book and scaffold package definitions with optimization defaults (LTO/PGO/-O3).", + "blocked_on": [ + "Implement automated parser" + ], + "owner": "mlfs_researcher" + }, + { + "id": "pgo-integration", + "title": "Integrate profile guided optimization support", + "description": "Add infrastructure for collection and replay of profiling data during package builds.", + "blocked_on": [ + "Decide on profiling workload definitions" + ], + "owner": "default_cli" + }, + { + "id": "lfs-html-parsers", + "title": "Automate LFS/BLFS/GLFS ingest via HTML parsing", + "description": "Avoid hardcoded package data; download the upstream books (LFS, BLFS, GLFS) and parse them to drive scaffolding and metadata updates.", + "blocked_on": [ + "Design resilient scraping strategies for each book", + "Implement incremental update workflow" + ], + "owner": "mlfs_researcher" + } + ], + "solved": [ + { + "id": "ai-metadata-store", + "title": "Create AI metadata directory", + "description": "Introduce ai/personas.json, ai/tasks.json, ai/bugs.json for persistent assistant context.", + "resolution": "Initial JSON files checked in with placeholder content.", + "owner": "default_cli" + }, + { + "id": "metadata-schema-v0.1", + "title": "Define package metadata schema", + "description": "Specify JSON schema and layout for storing scraped package detail from LFS family books.", + "resolution": "Added ai/metadata/schema.json with v0.1.0 structure and seeded initial package entry/index.", + "owner": "default_cli" + }, + { + "id": "metadata-indexer-cli", + "title": "Build metadata validation/indexing tool", + "description": "Provide a standalone CLI to validate package metadata against the schema and regenerate ai/metadata/index.json.", + "resolution": "Added src/bin/metadata_indexer.rs with schema validation, summary extraction, and index writer integration.", + "owner": "default_cli" + } + ] +} diff --git a/data/mlfs_ml-12.4-40-multilib.json b/data/mlfs_ml-12.4-40-multilib.json new file mode 100644 index 0000000..a5d88e9 --- /dev/null +++ b/data/mlfs_ml-12.4-40-multilib.json @@ -0,0 +1,1019 @@ +[ + { + "chapter": 5, + "name": "binutils", + "notes": null, + "section": "Binutils-2.45 - Pass 1", + "stage": "cross-toolchain", + "variant": "pass1", + "version": "2.45" + }, + { + "chapter": 5, + "name": "gcc", + "notes": null, + "section": "GCC-15.2.0 - Pass 1", + "stage": "cross-toolchain", + "variant": "pass1", + "version": "15.2.0" + }, + { + "chapter": 5, + "name": "linux-api-headers", + "notes": null, + "section": "Linux-6.16.9 API Headers", + "stage": "cross-toolchain", + "variant": null, + "version": "6.16.9" + }, + { + "chapter": 5, + "name": "glibc", + "notes": "Temporary toolchain", + "section": "Glibc-2.42", + "stage": "cross-toolchain", + "variant": null, + "version": "2.42" + }, + { + "chapter": 5, + "name": "libstdc++", + "notes": null, + "section": "Libstdc++ from GCC-15.2.0", + "stage": "cross-toolchain", + "variant": null, + "version": "15.2.0" + }, + { + "chapter": 6, + "name": "m4", + "notes": null, + "section": "M4-1.4.20", + "stage": "temporary-tools", + "variant": null, + "version": "1.4.20" + }, + { + "chapter": 6, + "name": "ncurses", + "notes": null, + "section": "Ncurses-6.5-20250809", + "stage": "temporary-tools", + "variant": null, + "version": "6.5-20250809" + }, + { + "chapter": 6, + "name": "bash", + "notes": null, + "section": "Bash-5.3", + "stage": "temporary-tools", + "variant": null, + "version": "5.3" + }, + { + "chapter": 6, + "name": "coreutils", + "notes": null, + "section": "Coreutils-9.8", + "stage": "temporary-tools", + "variant": null, + "version": "9.8" + }, + { + "chapter": 6, + "name": "diffutils", + "notes": null, + "section": "Diffutils-3.12", + "stage": "temporary-tools", + "variant": null, + "version": "3.12" + }, + { + "chapter": 6, + "name": "file", + "notes": null, + "section": "File-5.46", + "stage": "temporary-tools", + "variant": null, + "version": "5.46" + }, + { + "chapter": 6, + "name": "findutils", + "notes": null, + "section": "Findutils-4.10.0", + "stage": "temporary-tools", + "variant": null, + "version": "4.10.0" + }, + { + "chapter": 6, + "name": "gawk", + "notes": null, + "section": "Gawk-5.3.2", + "stage": "temporary-tools", + "variant": null, + "version": "5.3.2" + }, + { + "chapter": 6, + "name": "grep", + "notes": null, + "section": "Grep-3.12", + "stage": "temporary-tools", + "variant": null, + "version": "3.12" + }, + { + "chapter": 6, + "name": "gzip", + "notes": null, + "section": "Gzip-1.14", + "stage": "temporary-tools", + "variant": null, + "version": "1.14" + }, + { + "chapter": 6, + "name": "make", + "notes": null, + "section": "Make-4.4.1", + "stage": "temporary-tools", + "variant": null, + "version": "4.4.1" + }, + { + "chapter": 6, + "name": "patch", + "notes": null, + "section": "Patch-2.8", + "stage": "temporary-tools", + "variant": null, + "version": "2.8" + }, + { + "chapter": 6, + "name": "sed", + "notes": null, + "section": "Sed-4.9", + "stage": "temporary-tools", + "variant": null, + "version": "4.9" + }, + { + "chapter": 6, + "name": "tar", + "notes": null, + "section": "Tar-1.35", + "stage": "temporary-tools", + "variant": null, + "version": "1.35" + }, + { + "chapter": 6, + "name": "xz", + "notes": null, + "section": "Xz-5.8.1", + "stage": "temporary-tools", + "variant": null, + "version": "5.8.1" + }, + { + "chapter": 6, + "name": "binutils", + "notes": null, + "section": "Binutils-2.45 - Pass 2", + "stage": "temporary-tools", + "variant": "pass2", + "version": "2.45" + }, + { + "chapter": 6, + "name": "gcc", + "notes": null, + "section": "GCC-15.2.0 - Pass 2", + "stage": "temporary-tools", + "variant": "pass2", + "version": "15.2.0" + }, + { + "chapter": 7, + "name": "gettext", + "notes": null, + "section": "Gettext-0.26", + "stage": "temporary-tools", + "variant": null, + "version": "0.26" + }, + { + "chapter": 7, + "name": "bison", + "notes": null, + "section": "Bison-3.8.2", + "stage": "temporary-tools", + "variant": null, + "version": "3.8.2" + }, + { + "chapter": 7, + "name": "perl", + "notes": null, + "section": "Perl-5.42.0", + "stage": "temporary-tools", + "variant": null, + "version": "5.42.0" + }, + { + "chapter": 7, + "name": "python", + "notes": null, + "section": "Python-3.13.7", + "stage": "temporary-tools", + "variant": null, + "version": "3.13.7" + }, + { + "chapter": 7, + "name": "texinfo", + "notes": null, + "section": "Texinfo-7.2", + "stage": "temporary-tools", + "variant": null, + "version": "7.2" + }, + { + "chapter": 7, + "name": "util-linux", + "notes": null, + "section": "Util-linux-2.41.2", + "stage": "temporary-tools", + "variant": null, + "version": "2.41.2" + }, + { + "chapter": 8, + "name": "man-pages", + "notes": null, + "section": "Man-pages-6.15", + "stage": "system", + "variant": null, + "version": "6.15" + }, + { + "chapter": 8, + "name": "iana-etc", + "notes": null, + "section": "Iana-Etc-20250926", + "stage": "system", + "variant": null, + "version": "20250926" + }, + { + "chapter": 8, + "name": "glibc", + "notes": null, + "section": "Glibc-2.42", + "stage": "system", + "variant": null, + "version": "2.42" + }, + { + "chapter": 8, + "name": "zlib", + "notes": null, + "section": "Zlib-1.3.1", + "stage": "system", + "variant": null, + "version": "1.3.1" + }, + { + "chapter": 8, + "name": "bzip2", + "notes": null, + "section": "Bzip2-1.0.8", + "stage": "system", + "variant": null, + "version": "1.0.8" + }, + { + "chapter": 8, + "name": "xz", + "notes": null, + "section": "Xz-5.8.1", + "stage": "system", + "variant": null, + "version": "5.8.1" + }, + { + "chapter": 8, + "name": "lz4", + "notes": null, + "section": "Lz4-1.10.0", + "stage": "system", + "variant": null, + "version": "1.10.0" + }, + { + "chapter": 8, + "name": "zstd", + "notes": null, + "section": "Zstd-1.5.7", + "stage": "system", + "variant": null, + "version": "1.5.7" + }, + { + "chapter": 8, + "name": "file", + "notes": null, + "section": "File-5.46", + "stage": "system", + "variant": null, + "version": "5.46" + }, + { + "chapter": 8, + "name": "readline", + "notes": null, + "section": "Readline-8.3", + "stage": "system", + "variant": null, + "version": "8.3" + }, + { + "chapter": 8, + "name": "pcre2", + "notes": null, + "section": "Pcre2-10.46", + "stage": "system", + "variant": null, + "version": "10.46" + }, + { + "chapter": 8, + "name": "m4", + "notes": null, + "section": "M4-1.4.20", + "stage": "system", + "variant": null, + "version": "1.4.20" + }, + { + "chapter": 8, + "name": "bc", + "notes": null, + "section": "Bc-7.0.3", + "stage": "system", + "variant": null, + "version": "7.0.3" + }, + { + "chapter": 8, + "name": "flex", + "notes": null, + "section": "Flex-2.6.4", + "stage": "system", + "variant": null, + "version": "2.6.4" + }, + { + "chapter": 8, + "name": "tcl", + "notes": null, + "section": "Tcl-8.6.17", + "stage": "system", + "variant": null, + "version": "8.6.17" + }, + { + "chapter": 8, + "name": "expect", + "notes": null, + "section": "Expect-5.45.4", + "stage": "system", + "variant": null, + "version": "5.45.4" + }, + { + "chapter": 8, + "name": "dejagnu", + "notes": null, + "section": "DejaGNU-1.6.3", + "stage": "system", + "variant": null, + "version": "1.6.3" + }, + { + "chapter": 8, + "name": "pkgconf", + "notes": null, + "section": "Pkgconf-2.5.1", + "stage": "system", + "variant": null, + "version": "2.5.1" + }, + { + "chapter": 8, + "name": "binutils", + "notes": null, + "section": "Binutils-2.45", + "stage": "system", + "variant": null, + "version": "2.45" + }, + { + "chapter": 8, + "name": "gmp", + "notes": null, + "section": "GMP-6.3.0", + "stage": "system", + "variant": null, + "version": "6.3.0" + }, + { + "chapter": 8, + "name": "mpfr", + "notes": null, + "section": "MPFR-4.2.2", + "stage": "system", + "variant": null, + "version": "4.2.2" + }, + { + "chapter": 8, + "name": "mpc", + "notes": null, + "section": "MPC-1.3.1", + "stage": "system", + "variant": null, + "version": "1.3.1" + }, + { + "chapter": 8, + "name": "isl", + "notes": null, + "section": "ISL-0.27", + "stage": "system", + "variant": null, + "version": "0.27" + }, + { + "chapter": 8, + "name": "attr", + "notes": null, + "section": "Attr-2.5.2", + "stage": "system", + "variant": null, + "version": "2.5.2" + }, + { + "chapter": 8, + "name": "acl", + "notes": null, + "section": "Acl-2.3.2", + "stage": "system", + "variant": null, + "version": "2.3.2" + }, + { + "chapter": 8, + "name": "libcap", + "notes": null, + "section": "Libcap-2.76", + "stage": "system", + "variant": null, + "version": "2.76" + }, + { + "chapter": 8, + "name": "libxcrypt", + "notes": null, + "section": "Libxcrypt-4.4.38", + "stage": "system", + "variant": null, + "version": "4.4.38" + }, + { + "chapter": 8, + "name": "shadow", + "notes": null, + "section": "Shadow-4.18.0", + "stage": "system", + "variant": null, + "version": "4.18.0" + }, + { + "chapter": 8, + "name": "gcc", + "notes": null, + "section": "GCC-15.2.0", + "stage": "system", + "variant": null, + "version": "15.2.0" + }, + { + "chapter": 8, + "name": "ncurses", + "notes": null, + "section": "Ncurses-6.5-20250809", + "stage": "system", + "variant": null, + "version": "6.5-20250809" + }, + { + "chapter": 8, + "name": "sed", + "notes": null, + "section": "Sed-4.9", + "stage": "system", + "variant": null, + "version": "4.9" + }, + { + "chapter": 8, + "name": "psmisc", + "notes": null, + "section": "Psmisc-23.7", + "stage": "system", + "variant": null, + "version": "23.7" + }, + { + "chapter": 8, + "name": "gettext", + "notes": null, + "section": "Gettext-0.26", + "stage": "system", + "variant": null, + "version": "0.26" + }, + { + "chapter": 8, + "name": "bison", + "notes": null, + "section": "Bison-3.8.2", + "stage": "system", + "variant": null, + "version": "3.8.2" + }, + { + "chapter": 8, + "name": "grep", + "notes": null, + "section": "Grep-3.12", + "stage": "system", + "variant": null, + "version": "3.12" + }, + { + "chapter": 8, + "name": "bash", + "notes": null, + "section": "Bash-5.3", + "stage": "system", + "variant": null, + "version": "5.3" + }, + { + "chapter": 8, + "name": "libtool", + "notes": null, + "section": "Libtool-2.5.4", + "stage": "system", + "variant": null, + "version": "2.5.4" + }, + { + "chapter": 8, + "name": "gdbm", + "notes": null, + "section": "GDBM-1.26", + "stage": "system", + "variant": null, + "version": "1.26" + }, + { + "chapter": 8, + "name": "gperf", + "notes": null, + "section": "Gperf-3.3", + "stage": "system", + "variant": null, + "version": "3.3" + }, + { + "chapter": 8, + "name": "expat", + "notes": null, + "section": "Expat-2.7.3", + "stage": "system", + "variant": null, + "version": "2.7.3" + }, + { + "chapter": 8, + "name": "inetutils", + "notes": null, + "section": "Inetutils-2.6", + "stage": "system", + "variant": null, + "version": "2.6" + }, + { + "chapter": 8, + "name": "less", + "notes": null, + "section": "Less-679", + "stage": "system", + "variant": null, + "version": "679" + }, + { + "chapter": 8, + "name": "perl", + "notes": null, + "section": "Perl-5.42.0", + "stage": "system", + "variant": null, + "version": "5.42.0" + }, + { + "chapter": 8, + "name": "xml-parser", + "notes": null, + "section": "XML::Parser-2.47", + "stage": "system", + "variant": null, + "version": "2.47" + }, + { + "chapter": 8, + "name": "intltool", + "notes": null, + "section": "Intltool-0.51.0", + "stage": "system", + "variant": null, + "version": "0.51.0" + }, + { + "chapter": 8, + "name": "autoconf", + "notes": null, + "section": "Autoconf-2.72", + "stage": "system", + "variant": null, + "version": "2.72" + }, + { + "chapter": 8, + "name": "automake", + "notes": null, + "section": "Automake-1.18.1", + "stage": "system", + "variant": null, + "version": "1.18.1" + }, + { + "chapter": 8, + "name": "openssl", + "notes": null, + "section": "OpenSSL-3.5.3", + "stage": "system", + "variant": null, + "version": "3.5.3" + }, + { + "chapter": 8, + "name": "elfutils-libelf", + "notes": null, + "section": "Libelf from Elfutils-0.193", + "stage": "system", + "variant": null, + "version": "0.193" + }, + { + "chapter": 8, + "name": "libffi", + "notes": null, + "section": "Libffi-3.5.2", + "stage": "system", + "variant": null, + "version": "3.5.2" + }, + { + "chapter": 8, + "name": "sqlite", + "notes": null, + "section": "Sqlite-3500400", + "stage": "system", + "variant": null, + "version": "3500400" + }, + { + "chapter": 8, + "name": "python", + "notes": null, + "section": "Python-3.13.7", + "stage": "system", + "variant": null, + "version": "3.13.7" + }, + { + "chapter": 8, + "name": "flit-core", + "notes": null, + "section": "Flit-Core-3.12.0", + "stage": "system", + "variant": null, + "version": "3.12.0" + }, + { + "chapter": 8, + "name": "packaging", + "notes": null, + "section": "Packaging-25.0", + "stage": "system", + "variant": null, + "version": "25.0" + }, + { + "chapter": 8, + "name": "wheel", + "notes": null, + "section": "Wheel-0.46.1", + "stage": "system", + "variant": null, + "version": "0.46.1" + }, + { + "chapter": 8, + "name": "setuptools", + "notes": null, + "section": "Setuptools-80.9.0", + "stage": "system", + "variant": null, + "version": "80.9.0" + }, + { + "chapter": 8, + "name": "ninja", + "notes": null, + "section": "Ninja-1.13.1", + "stage": "system", + "variant": null, + "version": "1.13.1" + }, + { + "chapter": 8, + "name": "meson", + "notes": null, + "section": "Meson-1.9.1", + "stage": "system", + "variant": null, + "version": "1.9.1" + }, + { + "chapter": 8, + "name": "kmod", + "notes": null, + "section": "Kmod-34.2", + "stage": "system", + "variant": null, + "version": "34.2" + }, + { + "chapter": 8, + "name": "coreutils", + "notes": null, + "section": "Coreutils-9.8", + "stage": "system", + "variant": null, + "version": "9.8" + }, + { + "chapter": 8, + "name": "diffutils", + "notes": null, + "section": "Diffutils-3.12", + "stage": "system", + "variant": null, + "version": "3.12" + }, + { + "chapter": 8, + "name": "gawk", + "notes": null, + "section": "Gawk-5.3.2", + "stage": "system", + "variant": null, + "version": "5.3.2" + }, + { + "chapter": 8, + "name": "findutils", + "notes": null, + "section": "Findutils-4.10.0", + "stage": "system", + "variant": null, + "version": "4.10.0" + }, + { + "chapter": 8, + "name": "groff", + "notes": null, + "section": "Groff-1.23.0", + "stage": "system", + "variant": null, + "version": "1.23.0" + }, + { + "chapter": 8, + "name": "grub", + "notes": null, + "section": "GRUB-2.12", + "stage": "system", + "variant": null, + "version": "2.12" + }, + { + "chapter": 8, + "name": "gzip", + "notes": null, + "section": "Gzip-1.14", + "stage": "system", + "variant": null, + "version": "1.14" + }, + { + "chapter": 8, + "name": "iproute2", + "notes": null, + "section": "IPRoute2-6.16.0", + "stage": "system", + "variant": null, + "version": "6.16.0" + }, + { + "chapter": 8, + "name": "kbd", + "notes": null, + "section": "Kbd-2.9.0", + "stage": "system", + "variant": null, + "version": "2.9.0" + }, + { + "chapter": 8, + "name": "libpipeline", + "notes": null, + "section": "Libpipeline-1.5.8", + "stage": "system", + "variant": null, + "version": "1.5.8" + }, + { + "chapter": 8, + "name": "make", + "notes": null, + "section": "Make-4.4.1", + "stage": "system", + "variant": null, + "version": "4.4.1" + }, + { + "chapter": 8, + "name": "patch", + "notes": null, + "section": "Patch-2.8", + "stage": "system", + "variant": null, + "version": "2.8" + }, + { + "chapter": 8, + "name": "tar", + "notes": null, + "section": "Tar-1.35", + "stage": "system", + "variant": null, + "version": "1.35" + }, + { + "chapter": 8, + "name": "texinfo", + "notes": null, + "section": "Texinfo-7.2", + "stage": "system", + "variant": null, + "version": "7.2" + }, + { + "chapter": 8, + "name": "vim", + "notes": null, + "section": "Vim-9.1.1806", + "stage": "system", + "variant": null, + "version": "9.1.1806" + }, + { + "chapter": 8, + "name": "markupsafe", + "notes": null, + "section": "MarkupSafe-3.0.3", + "stage": "system", + "variant": null, + "version": "3.0.3" + }, + { + "chapter": 8, + "name": "jinja2", + "notes": null, + "section": "Jinja2-3.1.6", + "stage": "system", + "variant": null, + "version": "3.1.6" + }, + { + "chapter": 8, + "name": "systemd-udev", + "notes": null, + "section": "Udev from Systemd-257.8", + "stage": "system", + "variant": null, + "version": "257.8" + }, + { + "chapter": 8, + "name": "man-db", + "notes": null, + "section": "Man-DB-2.13.1", + "stage": "system", + "variant": null, + "version": "2.13.1" + }, + { + "chapter": 8, + "name": "procps-ng", + "notes": null, + "section": "Procps-ng-4.0.5", + "stage": "system", + "variant": null, + "version": "4.0.5" + }, + { + "chapter": 8, + "name": "util-linux", + "notes": null, + "section": "Util-linux-2.41.2", + "stage": "system", + "variant": null, + "version": "2.41.2" + }, + { + "chapter": 8, + "name": "e2fsprogs", + "notes": null, + "section": "E2fsprogs-1.47.3", + "stage": "system", + "variant": null, + "version": "1.47.3" + }, + { + "chapter": 8, + "name": "sysklogd", + "notes": null, + "section": "Sysklogd-2.7.2", + "stage": "system", + "variant": null, + "version": "2.7.2" + }, + { + "chapter": 8, + "name": "sysvinit", + "notes": null, + "section": "SysVinit-3.14", + "stage": "system", + "variant": null, + "version": "3.14" + }, + { + "chapter": 9, + "name": "lfs-bootscripts", + "notes": null, + "section": "LFS-Bootscripts-20250827", + "stage": "system", + "variant": null, + "version": "20250827" + }, + { + "chapter": 10, + "name": "linux", + "notes": "Final kernel build", + "section": "Linux-6.16.9", + "stage": "system", + "variant": null, + "version": "6.16.9" + } +] diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..19f9fb8 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,117 @@ +# Architecture Overview + +This project is split into a reusable Rust library crate (`package_management`) +and several binaries that orchestrate day-to-day workflows. The sections below +outline the main entry points and how the supporting modules fit together. + +## CLI entry points + +| Binary | Location | Purpose | +| ------ | -------- | ------- | +| `lpkg` | `src/main.rs` | Primary command-line interface with workflow automation and optional TUI integration. | +| `metadata_indexer` | `src/bin/metadata_indexer.rs` | Harvests LFS/BLFS/GLFS package metadata, validates it against the JSON schema, and keeps `ai/metadata/index.json` up to date. | + +### `lpkg` workflows + +`lpkg` uses [Clap](https://docs.rs/clap) to expose multiple subcommands: + +- `EnvCheck` – fetches `
` blocks from an LFS-style HTML page and runs the
+  embedded `ver_check` / `ver_kernel` scripts.
+- `FetchManifests` – downloads the book’s canonical `wget-list` and `md5sums`
+  files and writes them to disk.
+- `BuildBinutils` – parses the Binutils Pass 1 page, mirrors the documented
+  build steps, and executes them in a Tokio runtime.
+- `ScaffoldPackage` – generates a new module under `src/pkgs/by_name/` with
+  optimisation defaults (LTO/PGO/`-O3`) and persists metadata via the DB
+  helpers.
+- `ImportMlfs` – walks the MLFS catalogue, scaffolding definitions and storing
+  them in the database (with optional `--dry-run`, `--limit`, and `--overwrite`).
+
+When compiled with the `tui` feature flag, the CLI also exposes
+`lpkg tui disk-manager`, which drops the user into the terminal UI defined in
+`src/tui/`.
+
+### `metadata_indexer`
+
+The `metadata_indexer` binary is a companion tool for maintaining the JSON
+artifacts under `ai/metadata/`:
+
+- `validate` – validates every `packages/**.json` file against
+  `ai/metadata/schema.json` and reports schema or summary extraction issues.
+- `index` – revalidates the metadata and regenerates
+  `ai/metadata/index.json` (use `--compact` for single-line JSON).
+- `harvest` – fetches a given book page, extracts build metadata, and emits a
+  schema-compliant JSON skeleton. When direct HTML parsing does not locate the
+  source tarball, it falls back to the jhalfs `wget-list` data to populate
+  `source.urls`.
+
+## Module layout
+
+```
+src/
+  ai/             // JSON loaders for repository personas, tasks, and bugs
+  db/             // Diesel database setup and models
+  html.rs         // Lightweight HTML helpers (fetch + parse 
 blocks)
+  ingest/         // Parsers for LFS / MLFS / BLFS / GLFS book content
+  md5_utils.rs    // Fetches canonical md5sums from the book mirror
+  mirrors.rs      // Lists official source mirrors for downloads
+  pkgs/           // Package scaffolding and metadata definition helpers
+  tui/            // Optional terminal UI (crossterm + tui)
+  version_check.rs// Executes ver_check / ver_kernel snippets
+  wget_list.rs    // Fetches jhalfs-maintained wget-list manifests
+  bin/metadata_indexer.rs // AI metadata CLI described above
+```
+
+### Notable modules
+
+- **`src/pkgs/scaffolder.rs`**
+  - Generates filesystem modules and `PackageDefinition` records based on a
+    `ScaffoldRequest`.
+  - Normalises directory layout (prefix modules, `mod.rs` entries) and applies
+    optimisation defaults (LTO, PGO, `-O3`).
+
+- **`src/ingest/`**
+  - Provides HTML parsers tailored to each book flavour (LFS, MLFS, BLFS,
+    GLFS). The parsers emit `BookPackage` records consumed by the scaffolder
+    and metadata importer.
+
+- **`src/db/`**
+  - Diesel models and schema for persisting package metadata. `lpkg` uses these
+    helpers when scaffolding or importing packages.
+
+- **`src/tui/`**
+  - Houses the optional terminal interface (disk manager, main menu, settings,
+    downloader). The entry points are conditionally compiled behind the `tui`
+    cargo feature.
+
+## Data & metadata assets
+
+The repository keeps long-lived ARTifacts under `ai/`:
+
+- `ai/metadata/` – JSON schema (`schema.json`), package records, and a generated
+  index (`index.json`). The `metadata_indexer` binary maintains these files.
+- `ai/personas.json`, `ai/tasks.json`, `ai/bugs.json` – contextual data for
+  automated assistance.
+- `ai/notes.md` – scratchpad for future work (e.g., jhalfs integration).
+
+`data/` currently contains catalogues derived from the MLFS book and can be
+extended with additional book snapshots.
+
+## Database and persistence
+
+The Diesel setup uses SQLite (via the `diesel` crate with `sqlite` and `r2d2`
+features enabled). Connection pooling lives in `src/db/mod.rs` and is consumed
+by workflows that scaffold or import packages.
+
+## Optional terminal UI
+
+The TUI resolves around `DiskManager` (a crossterm + tui based interface for
+GPT partition inspection and creation). Additional stubs (`main_menu.rs`,
+`settings.rs`, `downloader.rs`) are present for future expansion. The main CLI
+falls back to `DiskManager::run_tui()` whenever `lpkg` is invoked without a
+subcommand and is compiled with `--features tui`.
+
+---
+
+For more operational details around metadata harvesting, refer to
+[`docs/METADATA_PIPELINE.md`](./METADATA_PIPELINE.md).
diff --git a/docs/METADATA_PIPELINE.md b/docs/METADATA_PIPELINE.md
new file mode 100644
index 0000000..895031c
--- /dev/null
+++ b/docs/METADATA_PIPELINE.md
@@ -0,0 +1,83 @@
+# Metadata Harvesting Pipeline
+
+This repository tracks AI-friendly package metadata under `ai/metadata/`.
+The `metadata_indexer` binary orchestrates validation and harvesting tasks.
+This document explains the workflow and the supporting assets.
+
+## Directory layout
+
+- `ai/metadata/schema.json` – JSON Schema (Draft 2020-12) describing one
+  package record.
+- `ai/metadata/packages//.json` – harvested package metadata.
+- `ai/metadata/index.json` – generated summary table linking package IDs to
+  their JSON files.
+- `ai/notes.md` – scratchpad for future improvements (e.g., jhalfs integration).
+
+## `metadata_indexer` commands
+
+| Command | Description |
+| ------- | ----------- |
+| `validate` | Loads every package JSON file and validates it against `schema.json`. Reports schema violations and summary extraction errors. |
+| `index` | Re-runs validation and regenerates `index.json`. Use `--compact` to write a single-line JSON payload. |
+| `harvest` | Fetches a book page, scrapes build instructions, and emits a draft metadata record (to stdout with `--dry-run` or into `ai/metadata/packages/`). |
+
+### Harvesting flow
+
+1. **Fetch HTML** – the requested page is downloaded with `reqwest` and parsed
+   using `scraper` selectors.
+2. **Heading metadata** – the `h1.sect1` title provides the chapter/section,
+   canonical package name, version, and optional variant hints.
+3. **Build steps** – `
` blocks become ordered `build`
+   phases (`setup`, `configure`, `build`, `test`, `install`).
+4. **Artifact stats** – `div.segmentedlist` entries supply SBU and disk usage.
+5. **Source URLs** – the harvester tries two strategies:
+   - Inline HTML links inside the page (common for BLFS articles).
+   - Fallback to the jhalfs `wget-list` for the selected book (currently MLFS)
+     using `package-management::wget_list::get_wget_list` to find matching
+     `-` entries.
+6. **Checksums** – integration with the book’s `md5sums` mirror is pending;
+   placeholder wiring exists (`src/md5_utils.rs`).
+7. **Status** – unresolved items (missing URLs, anchors, etc.) are recorded in
+   `status.issues` so humans can interrogate or patch the draft before
+   promoting it.
+
+### Known gaps
+
+- **Source links via tables** – some MLFS chapters list download links inside a
+  β€œPackage Information” table. The current implementation relies on the
+  jhalfs `wget-list` fallback instead of parsing that table.
+- **Checksums** – MD5 lookups from jhalfs are planned but not yet wired into
+  the harvest pipeline.
+- **Anchor discovery** – if the heading lacks an explicit `id` attribute, the
+  scraper attempts to locate child anchors or scan the raw HTML. If none are
+  found, a warning is recorded and `status.issues` contains a reminder.
+
+## Using jhalfs manifests
+
+The maintained `wget-list`/`md5sums` files hosted by jhalfs provide canonical
+source URLs and hashes. The helper modules `src/wget_list.rs` and
+`src/md5_utils.rs` download these lists for the multilib LFS book. The
+harvester currently consumes the wget-list as a fallback; integrating the
+`md5sums` file will let us emit `source.checksums` automatically.
+
+Planned enhancements (see `ai/notes.md` and `ai/bugs.json#metadata-harvest-no-source-urls`):
+
+1. Abstract list fetching so BLFS/GLFS variants can reuse the logic.
+2. Normalise the match criteria for package + version (handling pass stages,
+   suffixes, etc.).
+3. Populate checksum entries alongside URLs.
+
+## Manual review checklist
+
+When a new metadata file is generated:
+
+- `schema_version` should match `schema.json` (currently `v0.1.0`).
+- `package.id` should be unique (format `/`).
+- `source.urls` must include at least one primary URL; add mirrors/patches as
+  needed.
+- Clear any `status.issues` before promoting the record from `draft`.
+- Run `cargo run --bin metadata_indexer -- --base-dir . index` to regenerate
+  the global index once the draft is finalised.
+
+Refer to `README.md` for usage examples and to `docs/ARCHITECTURE.md` for a
+broader overview of the crate layout.
diff --git a/src/ai/mod.rs b/src/ai/mod.rs
new file mode 100644
index 0000000..fb9baca
--- /dev/null
+++ b/src/ai/mod.rs
@@ -0,0 +1,79 @@
+use std::path::{Path, PathBuf};
+
+use anyhow::Result;
+use serde::Deserialize;
+
+/// Loads assistant persona metadata from `ai/personas.json`.
+pub fn load_personas(base_dir: impl AsRef) -> Result> {
+    let path = resolve(base_dir, "personas.json");
+    read_json(path)
+}
+
+/// Loads the tracked task board from `ai/tasks.json`.
+pub fn load_tasks(base_dir: impl AsRef) -> Result {
+    let path = resolve(base_dir, "tasks.json");
+    read_json(path)
+}
+
+/// Loads the current bug ledger from `ai/bugs.json`.
+pub fn load_bugs(base_dir: impl AsRef) -> Result> {
+    let path = resolve(base_dir, "bugs.json");
+    read_json(path)
+}
+
+fn resolve(base_dir: impl AsRef, file: &str) -> PathBuf {
+    base_dir.as_ref().join("ai").join(file)
+}
+
+fn read_json(path: PathBuf) -> Result
+where
+    T: for<'de> Deserialize<'de>,
+{
+    let data = std::fs::read_to_string(&path)?;
+    Ok(serde_json::from_str(&data)?)
+}
+
+#[derive(Debug, Deserialize)]
+pub struct Persona {
+    pub id: String,
+    pub name: String,
+    pub description: String,
+    #[serde(default)]
+    pub strengths: Vec,
+    #[serde(default)]
+    pub notes: String,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct TaskBoard {
+    pub generated_at: String,
+    pub unfinished: Vec,
+    pub solved: Vec,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct Task {
+    pub id: String,
+    pub title: String,
+    pub description: String,
+    #[serde(default)]
+    pub blocked_on: Vec,
+    #[serde(default)]
+    pub owner: Option,
+    #[serde(default)]
+    pub resolution: Option,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct Bug {
+    pub id: String,
+    pub title: String,
+    pub description: String,
+    pub status: String,
+    #[serde(default)]
+    pub owner: Option,
+    #[serde(default)]
+    pub created_at: Option,
+    #[serde(default)]
+    pub labels: Vec,
+}
diff --git a/src/bin/metadata_indexer.rs b/src/bin/metadata_indexer.rs
new file mode 100644
index 0000000..f058903
--- /dev/null
+++ b/src/bin/metadata_indexer.rs
@@ -0,0 +1,1017 @@
+use std::collections::HashSet;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result};
+use chrono::Utc;
+use clap::{Parser, Subcommand};
+use jsonschema::JSONSchema;
+use regex::Regex;
+use reqwest::{blocking::Client, redirect::Policy};
+use scraper::{ElementRef, Html, Selector};
+use serde_json::{Value, json};
+use sha2::{Digest, Sha256};
+use walkdir::WalkDir;
+
+#[derive(Parser)]
+#[command(
+    name = "metadata-indexer",
+    about = "Validate and regenerate AI metadata index"
+)]
+struct Cli {
+    /// Repository root containing the `ai/metadata` directory
+    #[arg(long, default_value = ".")]
+    base_dir: PathBuf,
+
+    #[command(subcommand)]
+    command: Command,
+}
+
+#[derive(Subcommand)]
+enum Command {
+    /// Validate all package metadata against the JSON schema
+    Validate,
+    /// Validate metadata and regenerate ai/metadata/index.json
+    Index {
+        /// Emit compact JSON instead of pretty printing
+        #[arg(long)]
+        compact: bool,
+    },
+    /// Fetch and draft metadata for a specific package page
+    Harvest {
+        /// Book identifier (lfs, mlfs, blfs, glfs)
+        #[arg(long)]
+        book: String,
+        /// Page path (relative to base) or full URL
+        #[arg(long)]
+        page: String,
+        /// Override base URL for the selected book
+        #[arg(long)]
+        base_url: Option,
+        /// Optional explicit output file path
+        #[arg(long)]
+        output: Option,
+        /// Do not write to disk, just print JSON to stdout
+        #[arg(long)]
+        dry_run: bool,
+    },
+}
+
+fn main() -> Result<()> {
+    let cli = Cli::parse();
+    let base_dir = cli.base_dir.canonicalize().unwrap_or(cli.base_dir);
+    let metadata_dir = base_dir.join("ai").join("metadata");
+    let schema_path = metadata_dir.join("schema.json");
+    let packages_dir = metadata_dir.join("packages");
+
+    let (_schema_value, schema) = load_schema(&schema_path)?;
+    let packages = scan_packages(&packages_dir)?;
+
+    let mut had_errors = false;
+    for package in &packages {
+        let validation = schema.validate(&package.value);
+        if let Err(errors) = validation {
+            had_errors = true;
+            eprintln!(
+                "Schema validation failed for {}:",
+                package.relative_path.display()
+            );
+            for err in errors {
+                eprintln!("  - {}", err);
+            }
+        }
+
+        if let Some(err) = &package.summary_error {
+            had_errors = true;
+            eprintln!(
+                "Summary extraction failed for {}: {}",
+                package.relative_path.display(),
+                err
+            );
+        }
+    }
+
+    match cli.command {
+        Command::Validate => {
+            if had_errors {
+                anyhow::bail!("metadata validation failed");
+            }
+        }
+        Command::Index { compact } => {
+            if had_errors {
+                anyhow::bail!("metadata validation failed; index not updated");
+            }
+
+            let summaries: Vec<_> = packages
+                .iter()
+                .filter_map(|pkg| pkg.summary.clone())
+                .collect();
+
+            let schema_version = summaries
+                .first()
+                .map(|s| s.schema_version.as_str())
+                .unwrap_or("v0.0.0");
+
+            let generated_at = Utc::now().to_rfc3339();
+            let packages_json: Vec = summaries
+                .iter()
+                .map(|s| {
+                    json!({
+                        "id": s.id.clone(),
+                        "name": s.name.clone(),
+                        "version": s.version.clone(),
+                        "stage": s.stage.clone(),
+                        "book": s.book.clone(),
+                        "variant": s.variant.clone(),
+                        "status": s.status.clone(),
+                        "path": s.relative_path.clone(),
+                    })
+                })
+                .collect();
+
+            let index = json!({
+                "generated_at": generated_at,
+                "schema_version": schema_version,
+                "packages": packages_json,
+            });
+
+            let index_path = metadata_dir.join("index.json");
+            let serialized = if compact {
+                serde_json::to_string(&index)?
+            } else {
+                serde_json::to_string_pretty(&index)?
+            };
+            fs::write(&index_path, serialized)
+                .with_context(|| format!("writing {}", index_path.display()))?;
+            println!("Updated {}", index_path.display());
+        }
+        Command::Harvest {
+            book,
+            page,
+            base_url,
+            output,
+            dry_run,
+        } => {
+            let book_lower = book.to_lowercase();
+            let harvest = harvest_metadata(&metadata_dir, &book_lower, &page, base_url.as_deref())?;
+
+            if dry_run {
+                println!("{}", serde_json::to_string_pretty(&harvest.value)?);
+            } else {
+                let output_path = output.unwrap_or_else(|| {
+                    metadata_dir
+                        .join("packages")
+                        .join(&book_lower)
+                        .join(format!("{}.json", harvest.slug))
+                });
+                if let Some(parent) = output_path.parent() {
+                    fs::create_dir_all(parent)
+                        .with_context(|| format!("creating directory {}", parent.display()))?;
+                }
+                fs::write(&output_path, serde_json::to_string_pretty(&harvest.value)?)
+                    .with_context(|| format!("writing {}", output_path.display()))?;
+                println!(
+                    "Harvested metadata for {} -> {}",
+                    harvest.package_id,
+                    output_path.display()
+                );
+                println!(
+                    "Run `metadata_indexer --base-dir {} index` to refresh the index.",
+                    base_dir.display()
+                );
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn load_schema(path: &Path) -> Result<(&'static Value, JSONSchema)> {
+    let data = fs::read_to_string(path)
+        .with_context(|| format!("reading schema file {}", path.display()))?;
+    let value: Value = serde_json::from_str(&data)
+        .with_context(|| format!("parsing JSON schema {}", path.display()))?;
+    let leaked = Box::leak(Box::new(value));
+    let schema = JSONSchema::compile(leaked).context("compiling JSON schema")?;
+    Ok((leaked, schema))
+}
+
+fn scan_packages(dir: &Path) -> Result> {
+    if !dir.exists() {
+        return Ok(Vec::new());
+    }
+
+    let mut records = Vec::new();
+    for entry in WalkDir::new(dir)
+        .follow_links(false)
+        .into_iter()
+        .filter_map(|e| e.ok())
+        .filter(|e| e.file_type().is_file())
+        .filter(|e| e.path().extension().and_then(|s| s.to_str()) == Some("json"))
+    {
+        let path = entry.into_path();
+        let data = fs::read_to_string(&path)
+            .with_context(|| format!("reading package metadata {}", path.display()))?;
+        let value: Value = serde_json::from_str(&data)
+            .with_context(|| format!("parsing package JSON {}", path.display()))?;
+
+        let relative_path = path
+            .strip_prefix(dir.parent().unwrap_or(Path::new("")))
+            .unwrap_or(&path)
+            .to_path_buf();
+
+        let (summary, summary_error) = match extract_summary(&value, &relative_path) {
+            Ok(summary) => (Some(summary), None),
+            Err(err) => (None, Some(err)),
+        };
+
+        records.push(PackageRecord {
+            value,
+            relative_path,
+            summary,
+            summary_error,
+        });
+    }
+
+    Ok(records)
+}
+
+#[derive(Clone)]
+struct PackageSummary {
+    schema_version: String,
+    id: String,
+    name: String,
+    version: String,
+    stage: Option,
+    book: String,
+    variant: Option,
+    status: String,
+    relative_path: String,
+}
+
+struct PackageRecord {
+    value: Value,
+    relative_path: PathBuf,
+    summary: Option,
+    summary_error: Option,
+}
+
+fn extract_summary(value: &Value, relative_path: &Path) -> Result {
+    let schema_version = value
+        .get("schema_version")
+        .and_then(Value::as_str)
+        .context("missing schema_version")?
+        .to_string();
+    let package = value.get("package").context("missing package block")?;
+    let status = value.get("status").context("missing status block")?;
+
+    let id = package
+        .get("id")
+        .and_then(Value::as_str)
+        .context("missing package.id")?
+        .to_string();
+    let name = package
+        .get("name")
+        .and_then(Value::as_str)
+        .context("missing package.name")?
+        .to_string();
+    let version = package
+        .get("version")
+        .and_then(Value::as_str)
+        .context("missing package.version")?
+        .to_string();
+    let book = package
+        .get("book")
+        .and_then(Value::as_str)
+        .context("missing package.book")?
+        .to_string();
+    let stage = package
+        .get("stage")
+        .and_then(Value::as_str)
+        .map(|s| s.to_string());
+    let variant = package
+        .get("variant")
+        .and_then(Value::as_str)
+        .map(|s| s.to_string());
+    let status_state = status
+        .get("state")
+        .and_then(Value::as_str)
+        .context("missing status.state")?
+        .to_string();
+
+    Ok(PackageSummary {
+        schema_version,
+        id,
+        name,
+        version,
+        stage,
+        book,
+        variant,
+        status: status_state,
+        relative_path: relative_path
+            .to_str()
+            .unwrap_or_default()
+            .replace('\\', "/"),
+    })
+}
+
+struct HarvestResult {
+    value: Value,
+    slug: String,
+    package_id: String,
+}
+
+fn harvest_metadata(
+    metadata_dir: &Path,
+    book: &str,
+    page: &str,
+    override_base: Option<&str>,
+) -> Result {
+    let page_url = resolve_page_url(book, page, override_base)?;
+    let client = Client::builder()
+        .user_agent("lpkg-metadata-indexer/0.1")
+        .build()?;
+    let response = client
+        .get(&page_url)
+        .send()
+        .with_context(|| format!("fetching {}", page_url))?
+        .error_for_status()
+        .with_context(|| format!("non-success status for {}", page_url))?;
+    let html = response
+        .text()
+        .with_context(|| format!("reading response body from {}", page_url))?;
+
+    let document = Html::parse_document(&html);
+    let harvest = build_metadata_value(metadata_dir, book, &page_url, &document, &html)?;
+    Ok(harvest)
+}
+
+fn resolve_page_url(book: &str, page: &str, override_base: Option<&str>) -> Result {
+    if page.starts_with("http://") || page.starts_with("https://") {
+        return Ok(page.to_string());
+    }
+
+    let base = override_base
+        .map(|s| s.to_string())
+        .or_else(|| default_base_url(book).map(|s| s.to_string()))
+        .context("no base URL available for book")?;
+
+    let base = base.trim_end_matches('/');
+    let mut page_path = page.trim_start_matches('/').to_string();
+    if page_path.is_empty() {
+        page_path = "index.html".to_string();
+    }
+    if !page_path.ends_with(".html") {
+        page_path.push_str(".html");
+    }
+
+    Ok(format!("{}/{}", base, page_path))
+}
+
+fn default_base_url(book: &str) -> Option<&'static str> {
+    match book {
+        "lfs" => Some("https://www.linuxfromscratch.org/lfs/view/12.1"),
+        "mlfs" => Some("https://linuxfromscratch.org/~thomas/multilib-m32"),
+        "blfs" => Some("https://www.linuxfromscratch.org/blfs/view/systemd"),
+        "glfs" => Some("https://www.linuxfromscratch.org/glfs/view/glfs"),
+        _ => None,
+    }
+}
+
+fn build_metadata_value(
+    metadata_dir: &Path,
+    book: &str,
+    page_url: &str,
+    document: &Html,
+    html: &str,
+) -> Result {
+    let heading_selector = Selector::parse("h1.sect1").unwrap();
+    let heading = document
+        .select(&heading_selector)
+        .next()
+        .context("no 

found")?; + let heading_text = heading + .text() + .map(|t| t.replace('\u{00a0}', " ")) + .collect::>() + .join(" "); + let heading_clean = normalize_whitespace(&heading_text); + let heading_re = Regex::new(r"^(?P
\d+\.\d+)\.\s+(?P.+)$")?; + let caps = heading_re + .captures(&heading_clean) + .with_context(|| format!("unable to parse heading '{}'", heading_clean))?; + let section = caps["section"].to_string(); + let title = caps["title"].trim().to_string(); + + let (name, version, variant) = split_name_variant(&title); + let chapter_num: u32 = section + .split('.') + .next() + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + let stage = stage_for_chapter(chapter_num).map(|s| s.to_string()); + + let slug_base = slugify(&name); + let slug = variant + .as_ref() + .map(|v| format!("{}-{}", slug_base, slugify(v))) + .unwrap_or_else(|| slug_base.clone()); + let package_id = format!("{}/{}", book, slug); + let package_id_for_json = package_id.clone(); + + let anchor_url = heading + .value() + .id() + .map(|id| format!("{}#{}", page_url, id)) + .or_else(|| locate_child_anchor(&heading).map(|id| format!("{}#{}", page_url, id))) + .or_else(|| { + let anchor_selector = Selector::parse("a[id]").unwrap(); + document + .select(&anchor_selector) + .filter_map(|a| a.value().attr("id")) + .find(|id| id.contains(&slug_base)) + .map(|id| format!("{}#{}", page_url, id)) + }) + .or_else(|| { + let escaped = regex::escape(&slug_base); + let pattern = format!(r#"id=\"([^\"]*{}[^\"]*)\""#, escaped); + Regex::new(&pattern) + .ok() + .and_then(|re| re.captures(html)) + .and_then(|caps| caps.get(1)) + .map(|m| format!("{}#{}", page_url, m.as_str())) + }); + + let mut source_urls = collect_tarball_urls(page_url, document); + let mut archive_name = infer_archive_from_commands(document).or_else(|| { + source_urls.iter().find_map(|entry| { + entry + .url + .path_segments() + .and_then(|mut iter| iter.next_back()) + .map(|s| s.to_string()) + }) + }); + + if source_urls.is_empty() { + match fallback_urls_from_wget(metadata_dir, book, &slug_base, &version) { + Ok(fallback) => { + if !fallback.is_empty() { + eprintln!( + "info: using {} URL(s) from wget-list for {} {}", + fallback.len(), + slug_base, + version + ); + source_urls = fallback; + } + } + Err(err) => { + eprintln!( + "warning: failed to consult wget-list for {} {}: {}", + slug_base, version, err + ); + } + } + } + + if archive_name.is_none() { + archive_name = source_urls.iter().find_map(|entry| { + entry + .url + .path_segments() + .and_then(|mut iter| iter.next_back()) + .map(|s| s.to_string()) + }); + if archive_name.is_none() { + eprintln!( + "warning: unable to infer archive name from source URLs for {} {}", + slug_base, version + ); + } + } + + let (sbu, disk) = extract_artifacts(document); + let build_steps = extract_build_steps(document); + + let mut issues = Vec::new(); + if anchor_url.is_none() { + issues.push("Could not locate anchor id for primary heading".to_string()); + } + if source_urls.is_empty() { + issues.push("No source URLs with archive extensions detected".to_string()); + } + if build_steps.is_empty() { + issues.push("No <pre class=\"userinput\"> blocks found for build commands".to_string()); + } + + let source_urls_json: Vec<Value> = source_urls + .iter() + .map(|entry| { + json!({ + "url": entry.url.as_str(), + "kind": entry.kind, + }) + }) + .collect(); + + let checksum_entries = match resolve_checksums(metadata_dir, book, archive_name.as_deref()) { + Ok(values) => values, + Err(err) => { + eprintln!( + "warning: failed to resolve checksums for {} {}: {}", + slug_base, version, err + ); + Vec::new() + } + }; + + let build_json: Vec<Value> = build_steps + .iter() + .map(|step| { + json!({ + "phase": step.phase, + "commands": step.commands, + "cwd": step.cwd, + "requires_root": step.requires_root, + "notes": step.notes, + }) + }) + .collect(); + + let body_selector = Selector::parse("body").unwrap(); + let book_release = document + .select(&body_selector) + .next() + .and_then(|body| body.value().id()) + .map(|id| id.to_string()) + .unwrap_or_default(); + + let retrieved_at = Utc::now().to_rfc3339(); + let content_hash = hex::encode(Sha256::digest(html.as_bytes())); + + let anchors_value = match anchor_url { + Some(ref href) => json!({ "section": href }), + None => json!({}), + }; + + let status_state = "draft"; + + let package_json = json!({ + "schema_version": "v0.1.0", + "package": { + "id": package_id_for_json, + "name": name, + "upstream": Option::<String>::None, + "version": version, + "book": book, + "chapter": chapter_num, + "section": section, + "stage": stage, + "variant": variant, + "anchors": anchors_value, + }, + "source": { + "urls": source_urls_json, + "archive": archive_name, + "checksums": checksum_entries, + }, + "artifacts": { + "sbu": sbu, + "disk": disk, + "install_prefix": Option::<String>::None, + }, + "dependencies": { + "build": Vec::<Value>::new(), + "runtime": Vec::<Value>::new(), + }, + "environment": { + "variables": Vec::<Value>::new(), + "users": Vec::<Value>::new(), + }, + "build": build_json, + "optimizations": { + "enable_lto": true, + "enable_pgo": true, + "cflags": ["-O3", "-flto"], + "ldflags": ["-flto"], + "profdata": Option::<String>::None, + }, + "provenance": { + "book_release": book_release, + "page_url": page_url, + "retrieved_at": retrieved_at, + "content_hash": content_hash, + }, + "status": { + "state": status_state, + "issues": issues, + } + }); + + Ok(HarvestResult { + value: package_json, + slug, + package_id, + }) +} + +fn normalize_whitespace(input: &str) -> String { + let mut result = String::with_capacity(input.len()); + let mut prev_space = false; + for ch in input.chars() { + if ch.is_whitespace() { + if !prev_space { + result.push(' '); + prev_space = true; + } + } else { + prev_space = false; + result.push(ch); + } + } + result.trim().to_string() +} + +fn slugify(input: &str) -> String { + let mut result = String::new(); + let mut prev_dash = false; + for ch in input.chars() { + let normalized = match ch { + 'A'..='Z' => ch.to_ascii_lowercase(), + 'a'..='z' | '0'..='9' => ch, + _ => '-', + }; + if normalized == '-' { + if !prev_dash { + result.push('-'); + prev_dash = true; + } + } else { + prev_dash = false; + result.push(normalized); + } + } + result.trim_matches('-').to_string() +} + +fn split_name_variant(title: &str) -> (String, String, Option<String>) { + let mut base = title.trim().to_string(); + let mut variant = None; + if let Some(idx) = base.rfind(" - ") { + variant = Some(base[idx + 3..].trim().to_string()); + base = base[..idx].trim().to_string(); + } + + let bytes = base.as_bytes(); + for idx in (0..bytes.len()).rev() { + if bytes[idx] == b'-' { + if let Some(next) = bytes.get(idx + 1) { + if next.is_ascii_digit() { + let name = base[..idx].trim(); + let version = base[idx + 1..].trim(); + if !name.is_empty() && !version.is_empty() { + return (name.to_string(), version.to_string(), variant); + } + } + } + } + } + + (base, String::from("unknown"), variant) +} + +fn stage_for_chapter(chapter: u32) -> Option<&'static str> { + match chapter { + 5 => Some("cross-toolchain"), + 6 | 7 => Some("temporary-tools"), + 8 => Some("system"), + 9 => Some("system-configuration"), + 10 => Some("system-finalization"), + _ => None, + } +} + +struct SourceUrlEntry { + url: url::Url, + kind: &'static str, +} + +enum ManifestKind { + WgetList, + Md5Sums, +} + +impl ManifestKind { + fn filename(&self) -> &'static str { + match self { + ManifestKind::WgetList => "wget-list.txt", + ManifestKind::Md5Sums => "md5sums.txt", + } + } +} + +fn collect_tarball_urls(page_url: &str, document: &Html) -> Vec<SourceUrlEntry> { + let base = url::Url::parse(page_url).ok(); + let link_selector = Selector::parse("a").unwrap(); + let mut seen = HashSet::new(); + let mut results = Vec::new(); + + for link in document.select(&link_selector) { + if let Some(href) = link.value().attr("href") { + if let Some(kind) = classify_artifact_url(href) { + let resolved = match (&base, url::Url::parse(href)) { + (_, Ok(url)) => url, + (Some(base_url), Err(_)) => match base_url.join(href) { + Ok(url) => url, + Err(_) => continue, + }, + _ => continue, + }; + if seen.insert(resolved.clone()) { + results.push(SourceUrlEntry { + url: resolved, + kind, + }); + } + } + } + } + + results +} + +fn classify_artifact_url(href: &str) -> Option<&'static str> { + let lower = href.to_lowercase(); + if lower.ends_with(".tar") + || lower.ends_with(".tar.gz") + || lower.ends_with(".tar.bz2") + || lower.ends_with(".tar.xz") + || lower.ends_with(".tgz") + || lower.ends_with(".zip") + { + Some("primary") + } else if lower.ends_with(".patch") { + Some("patch") + } else if lower.ends_with(".sig") || lower.ends_with(".asc") { + Some("signature") + } else { + None + } +} + +fn fallback_urls_from_wget( + metadata_dir: &Path, + book: &str, + slug: &str, + version: &str, +) -> Result<Vec<SourceUrlEntry>> { + let manifest = load_jhalfs_manifest(metadata_dir, book, ManifestKind::WgetList)?; + let needle = format!("{}-{}", slug.replace('_', "-"), version); + eprintln!("debug: searching wget-list for '{}'", needle); + let mut entries = Vec::new(); + for line in manifest.lines() { + if line.contains(&needle) { + if let Ok(url) = url::Url::parse(line.trim()) { + eprintln!("info: matched wget URL {}", url); + entries.push(SourceUrlEntry { + url, + kind: "primary", + }); + } else { + eprintln!( + "warning: unable to parse URL from wget-list line: {}", + line.trim() + ); + } + } + } + if entries.is_empty() { + eprintln!("warning: no wget-list entries matched '{}'", needle); + } + Ok(entries) +} + +fn resolve_checksums( + metadata_dir: &Path, + book: &str, + archive_name: Option<&str>, +) -> Result<Vec<Value>> { + let mut checksums = Vec::new(); + let Some(archive) = archive_name else { + return Ok(checksums); + }; + + let manifest = load_jhalfs_manifest(metadata_dir, book, ManifestKind::Md5Sums)?; + for line in manifest.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let mut parts = trimmed.split_whitespace(); + let Some(hash) = parts.next() else { continue }; + let Some(file) = parts.next() else { continue }; + if file == archive { + checksums.push(json!({ + "alg": "md5", + "value": hash.to_lowercase(), + })); + break; + } + } + + Ok(checksums) +} + +fn load_jhalfs_manifest(metadata_dir: &Path, book: &str, kind: ManifestKind) -> Result<String> { + let cache_dir = metadata_dir.join("cache"); + fs::create_dir_all(&cache_dir) + .with_context(|| format!("creating cache directory {}", cache_dir.display()))?; + + let cache_path = cache_dir.join(format!("{}-{}", book, kind.filename())); + if cache_path.exists() { + return fs::read_to_string(&cache_path) + .with_context(|| format!("reading cached manifest {}", cache_path.display())); + } + + let url = manifest_url(book, &kind) + .with_context(|| format!("no manifest URL configured for book '{}'", book))?; + + let client = Client::builder().redirect(Policy::limited(5)).build()?; + let body = client + .get(url) + .send() + .with_context(|| format!("fetching {}", url))? + .error_for_status() + .with_context(|| format!("request failed for {}", url))? + .text() + .with_context(|| format!("reading response body from {}", url))?; + + fs::write(&cache_path, &body) + .with_context(|| format!("caching manifest {}", cache_path.display()))?; + + Ok(body) +} + +fn manifest_url(book: &str, kind: &ManifestKind) -> Option<&'static str> { + match (book, kind) { + ("mlfs", ManifestKind::WgetList) => { + Some("https://www.linuxfromscratch.org/~thomas/multilib-m32/wget-list-sysv") + } + ("mlfs", ManifestKind::Md5Sums) => { + Some("https://www.linuxfromscratch.org/~thomas/multilib-m32/md5sums") + } + ("lfs", ManifestKind::WgetList) => { + Some("https://www.linuxfromscratch.org/lfs/view/12.1/wget-list") + } + ("lfs", ManifestKind::Md5Sums) => { + Some("https://www.linuxfromscratch.org/lfs/view/12.1/md5sums") + } + ("blfs", ManifestKind::WgetList) => { + Some("https://anduin.linuxfromscratch.org/BLFS/view/systemd/wget-list") + } + ("blfs", ManifestKind::Md5Sums) => { + Some("https://anduin.linuxfromscratch.org/BLFS/view/systemd/md5sums") + } + ("glfs", ManifestKind::WgetList) => { + Some("https://www.linuxfromscratch.org/glfs/view/glfs/wget-list") + } + ("glfs", ManifestKind::Md5Sums) => { + Some("https://www.linuxfromscratch.org/glfs/view/glfs/md5sums") + } + _ => None, + } +} + +fn locate_child_anchor(heading: &ElementRef) -> Option<String> { + let mut current = heading.first_child(); + while let Some(node) = current { + if let Some(element) = ElementRef::wrap(node) { + if let Some(id) = element + .value() + .attr("id") + .or_else(|| element.value().attr("name")) + { + return Some(id.to_string()); + } + } + current = node.next_sibling(); + } + None +} + +fn infer_archive_from_commands(document: &Html) -> Option<String> { + let pre_selector = Selector::parse("pre.userinput").unwrap(); + for pre in document.select(&pre_selector) { + let text = pre.text().collect::<Vec<_>>().join("\n"); + for line in text.lines() { + if let Some(start) = line.find("tar -xf") { + let args = line[start + 7..].trim(); + let parts: Vec<&str> = args.split_whitespace().collect(); + if let Some(archive) = parts.get(0) { + let cleaned = archive.trim_matches(['"', '\'', ','].as_ref()); + if cleaned.ends_with(".tar") + || cleaned.contains(".tar.") + || cleaned.ends_with(".tgz") + || cleaned.ends_with(".zip") + { + return Some(cleaned.trim_start_matches("../").to_string()); + } + } + } + } + } + None +} + +fn extract_artifacts(document: &Html) -> (Option<f64>, Option<i64>) { + let seg_selector = Selector::parse("div.segmentedlist div.seg").unwrap(); + let title_selector = Selector::parse("strong.segtitle").unwrap(); + let body_selector = Selector::parse("span.segbody").unwrap(); + let mut sbu = None; + let mut disk = None; + + for seg in document.select(&seg_selector) { + let title = seg + .select(&title_selector) + .next() + .map(|n| normalize_whitespace(&n.text().collect::<Vec<_>>().join(""))); + let body = seg + .select(&body_selector) + .next() + .map(|n| normalize_whitespace(&n.text().collect::<Vec<_>>().join(""))); + + if let (Some(title), Some(body)) = (title, body) { + if title.contains("Approximate build time") { + if let Some(value) = parse_numeric(&body) { + sbu = Some(value); + } + } else if title.contains("Required disk space") { + if let Some(value) = parse_numeric(&body) { + disk = Some(value as i64); + } + } + } + } + + (sbu, disk) +} + +fn parse_numeric(input: &str) -> Option<f64> { + let re = Regex::new(r"([0-9]+(?:\\.[0-9]+)?)").ok()?; + re.captures(input) + .and_then(|caps| caps.get(1)) + .and_then(|m| m.as_str().parse().ok()) +} + +struct BuildStep { + phase: &'static str, + commands: Vec<String>, + cwd: Option<String>, + requires_root: bool, + notes: Option<String>, +} + +fn extract_build_steps(document: &Html) -> Vec<BuildStep> { + let pre_selector = Selector::parse("pre.userinput").unwrap(); + let mut steps = Vec::new(); + + for pre in document.select(&pre_selector) { + let code = pre.text().collect::<Vec<_>>().join("\n"); + let commands: Vec<String> = code + .lines() + .map(|line| line.trim().to_string()) + .filter(|line| !line.is_empty()) + .collect(); + + if commands.is_empty() { + continue; + } + + let phase = classify_phase(&commands); + steps.push(BuildStep { + phase, + commands, + cwd: None, + requires_root: false, + notes: None, + }); + } + + steps +} + +fn classify_phase(commands: &[String]) -> &'static str { + let joined = commands.join("\n").to_lowercase(); + if joined.contains("make install") { + "install" + } else if joined.contains("make -k check") || joined.contains("make check") { + "test" + } else if joined.contains("configure") { + "configure" + } else if joined.contains("tar -xf") || joined.contains("mkdir ") { + "setup" + } else { + "build" + } +} diff --git a/src/db/mod.rs b/src/db/mod.rs new file mode 100644 index 0000000..9cc40b9 --- /dev/null +++ b/src/db/mod.rs @@ -0,0 +1,107 @@ +pub mod models; +pub mod schema; + +use std::env; + +use anyhow::{Context, Result}; +use diesel::prelude::*; +use diesel::r2d2::{self, ConnectionManager}; +use diesel::sqlite::SqliteConnection; + +use crate::pkgs::package::PackageDefinition; + +use self::models::{NewPackage, Package}; +use self::schema::packages::dsl as packages_dsl; + +pub type Pool = r2d2::Pool<ConnectionManager<SqliteConnection>>; +pub type Connection = r2d2::PooledConnection<ConnectionManager<SqliteConnection>>; + +const DEFAULT_DB_URL: &str = "lpkg.db"; + +/// Resolve the database URL from `LPKG_DATABASE_URL` or fall back to `lpkg.db` in the CWD. +pub fn database_url() -> String { + env::var("LPKG_DATABASE_URL").unwrap_or_else(|_| DEFAULT_DB_URL.to_string()) +} + +/// Build an r2d2 connection pool and ensure the schema exists. +pub fn establish_pool() -> Result<Pool> { + let manager = ConnectionManager::<SqliteConnection>::new(database_url()); + let pool = Pool::builder() + .build(manager) + .context("creating Diesel connection pool")?; + + { + let mut conn = pool + .get() + .context("establishing initial database connection")?; + initialize(&mut conn)?; + } + + Ok(pool) +} + +fn initialize(conn: &mut SqliteConnection) -> Result<()> { + diesel::sql_query( + "CREATE TABLE IF NOT EXISTS packages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + version TEXT NOT NULL, + source TEXT, + md5 TEXT, + configure_args TEXT, + build_commands TEXT, + install_commands TEXT, + dependencies TEXT, + enable_lto BOOLEAN NOT NULL DEFAULT 1, + enable_pgo BOOLEAN NOT NULL DEFAULT 1, + cflags TEXT, + ldflags TEXT, + profdata TEXT + )", + ) + .execute(conn) + .context("creating packages table")?; + + diesel::sql_query( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_packages_name_version ON packages(name, version)", + ) + .execute(conn) + .context("creating packages unique index")?; + + Ok(()) +} + +/// Insert or update a package definition in the database. +pub fn upsert_package(conn: &mut SqliteConnection, definition: &PackageDefinition) -> Result<()> { + let record = NewPackage::try_from(definition)?; + + diesel::insert_into(packages_dsl::packages) + .values(&record) + .on_conflict((packages_dsl::name, packages_dsl::version)) + .do_update() + .set(&record) + .execute(conn) + .context("upserting package record")?; + + Ok(()) +} + +/// Convenience helper to upsert via pool and return the persisted definition. +pub fn upsert_package_via_pool(pool: &Pool, definition: &PackageDefinition) -> Result<()> { + let mut conn = pool.get().context("acquiring database connection")?; + upsert_package(&mut conn, definition) +} + +/// Load all packages from the database. +pub fn load_packages(conn: &mut SqliteConnection) -> Result<Vec<Package>> { + packages_dsl::packages + .order((packages_dsl::name, packages_dsl::version)) + .load::<Package>(conn) + .context("loading packages from database") +} + +/// Load packages using the shared pool. +pub fn load_packages_via_pool(pool: &Pool) -> Result<Vec<Package>> { + let mut conn = pool.get().context("acquiring database connection")?; + load_packages(&mut conn) +} diff --git a/src/db/models.rs b/src/db/models.rs new file mode 100644 index 0000000..cf48092 --- /dev/null +++ b/src/db/models.rs @@ -0,0 +1,104 @@ +use anyhow::{Context, Result}; +use diesel::prelude::*; +use serde::{Deserialize, Serialize}; + +use crate::pkgs::package::PackageDefinition; + +use super::schema::packages; + +#[derive(Debug, Queryable, Serialize, Deserialize)] +pub struct Package { + pub id: i32, + pub name: String, + pub version: String, + pub source: Option<String>, + pub md5: Option<String>, + pub configure_args: Option<String>, + pub build_commands: Option<String>, + pub install_commands: Option<String>, + pub dependencies: Option<String>, + pub enable_lto: bool, + pub enable_pgo: bool, + pub cflags: Option<String>, + pub ldflags: Option<String>, + pub profdata: Option<String>, +} + +impl Package { + pub fn into_definition(self) -> Result<PackageDefinition> { + Ok(PackageDefinition { + name: self.name, + version: self.version, + source: self.source, + md5: self.md5, + configure_args: parse_vec(self.configure_args)?, + build_commands: parse_vec(self.build_commands)?, + install_commands: parse_vec(self.install_commands)?, + dependencies: parse_vec(self.dependencies)?, + optimizations: crate::pkgs::package::OptimizationSettings { + enable_lto: self.enable_lto, + enable_pgo: self.enable_pgo, + cflags: parse_vec(self.cflags)?, + ldflags: parse_vec(self.ldflags)?, + profdata: self.profdata, + }, + }) + } +} + +#[derive(Debug, Insertable, AsChangeset)] +#[diesel(table_name = packages)] +pub struct NewPackage { + pub name: String, + pub version: String, + pub source: Option<String>, + pub md5: Option<String>, + pub configure_args: Option<String>, + pub build_commands: Option<String>, + pub install_commands: Option<String>, + pub dependencies: Option<String>, + pub enable_lto: bool, + pub enable_pgo: bool, + pub cflags: Option<String>, + pub ldflags: Option<String>, + pub profdata: Option<String>, +} + +impl TryFrom<&PackageDefinition> for NewPackage { + type Error = anyhow::Error; + + fn try_from(value: &PackageDefinition) -> Result<Self> { + Ok(Self { + name: value.name.clone(), + version: value.version.clone(), + source: value.source.clone(), + md5: value.md5.clone(), + configure_args: serialize_vec(&value.configure_args)?, + build_commands: serialize_vec(&value.build_commands)?, + install_commands: serialize_vec(&value.install_commands)?, + dependencies: serialize_vec(&value.dependencies)?, + enable_lto: value.optimizations.enable_lto, + enable_pgo: value.optimizations.enable_pgo, + cflags: serialize_vec(&value.optimizations.cflags)?, + ldflags: serialize_vec(&value.optimizations.ldflags)?, + profdata: value.optimizations.profdata.clone(), + }) + } +} + +fn serialize_vec(values: &[String]) -> Result<Option<String>> { + if values.is_empty() { + Ok(None) + } else { + serde_json::to_string(values) + .map(Some) + .context("serializing vector to JSON") + } +} + +fn parse_vec(raw: Option<String>) -> Result<Vec<String>> { + match raw { + Some(data) => serde_json::from_str(&data).context("parsing JSON vector"), + None => Ok(Vec::new()), + } +} diff --git a/src/db/schema.rs b/src/db/schema.rs new file mode 100644 index 0000000..9ecfa1e --- /dev/null +++ b/src/db/schema.rs @@ -0,0 +1,19 @@ +// Diesel schema for package storage. Maintained manually to avoid build-script dependency. +diesel::table! { + packages (id) { + id -> Integer, + name -> Text, + version -> Text, + source -> Nullable<Text>, + md5 -> Nullable<Text>, + configure_args -> Nullable<Text>, + build_commands -> Nullable<Text>, + install_commands -> Nullable<Text>, + dependencies -> Nullable<Text>, + enable_lto -> Bool, + enable_pgo -> Bool, + cflags -> Nullable<Text>, + ldflags -> Nullable<Text>, + profdata -> Nullable<Text>, + } +} diff --git a/src/ingest/blfs.rs b/src/ingest/blfs.rs new file mode 100644 index 0000000..850b46d --- /dev/null +++ b/src/ingest/blfs.rs @@ -0,0 +1,113 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use reqwest::blocking::Client; +use scraper::{Html, Selector}; + +use super::{BookPackage, FetchOptions}; +use crate::ingest::lfs::split_name_version; + +pub fn fetch_book(options: &FetchOptions) -> Result<Vec<BookPackage>> { + let base = options.base_url.trim_end_matches('/'); + let url = format!("{base}/book.html"); + + let client = Client::builder().build().context("building HTTP client")?; + let body = client + .get(&url) + .send() + .with_context(|| format!("fetching {}", url))? + .error_for_status() + .with_context(|| format!("request failed for {}", url))? + .text() + .context("reading response body")?; + + parse_book_html(options, &url, &body) +} + +pub fn parse_book_html( + options: &FetchOptions, + book_url: &str, + body: &str, +) -> Result<Vec<BookPackage>> { + let document = Html::parse_document(body); + let selector = Selector::parse("h1.sect1").unwrap(); + let numbering_re = + Regex::new(r"^(?P<chapter>\d+)\.(?P<section>\d+)\.\s+(?P<title>.+)$").unwrap(); + + let mut results = Vec::new(); + + for heading in document.select(&selector) { + let text = heading + .text() + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect::<Vec<_>>() + .join(" ") + .replace('\n', " ") + .trim() + .to_string(); + if text.is_empty() { + continue; + } + + // BLFS headings often look like "33.2. Bzip2" or "33.2. Bzip2-1.0.8" + let caps = match numbering_re.captures(&text) { + Some(caps) => caps, + None => continue, + }; + + let chapter_num: u32 = caps["chapter"].parse().unwrap_or(0); + let section_num: u32 = caps["section"].parse().unwrap_or(0); + let title = caps["title"].trim(); + + let (name, version, variant) = match split_name_version(title) { + Some(parts) => parts, + None => continue, + }; + + let href = heading.value().id().map(|id| { + let mut base = book_url.to_string(); + if !base.contains('#') { + base.push('#'); + } + format!("{}{}", base, id) + }); + + let section_label = Some(format!("{}.{}", chapter_num, section_num)); + + results.push(BookPackage { + book: options.book, + chapter: Some(chapter_num), + section: section_label, + name, + version: Some(version), + href, + md5: None, + stage: None, + variant, + notes: None, + }); + } + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ingest::BookKind; + + #[test] + fn parse_blfs_sample() { + let html = r#" + <html><body> + <h1 class=\"sect1\" id=\"ch33-bzip2\">33.2. Bzip2-1.0.8</h1> + <h1 class=\"sect1\" id=\"ch33-about\">33.1. Introduction</h1> + </body></html> + "#; + let opts = FetchOptions::new("https://example.invalid/blfs", BookKind::Blfs); + let items = parse_book_html(&opts, "https://example.invalid/blfs/book.html", html).unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0].name, "Bzip2"); + assert_eq!(items[0].version.as_deref(), Some("1.0.8")); + } +} diff --git a/src/ingest/glfs.rs b/src/ingest/glfs.rs new file mode 100644 index 0000000..3fb7dff --- /dev/null +++ b/src/ingest/glfs.rs @@ -0,0 +1,109 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use reqwest::blocking::Client; +use scraper::{Html, Selector}; + +use super::{BookPackage, FetchOptions}; +use crate::ingest::lfs::split_name_version; + +pub fn fetch_book(options: &FetchOptions) -> Result<Vec<BookPackage>> { + let base = options.base_url.trim_end_matches('/'); + let url = format!("{base}/book.html"); + + let client = Client::builder().build().context("building HTTP client")?; + let body = client + .get(&url) + .send() + .with_context(|| format!("fetching {}", url))? + .error_for_status() + .with_context(|| format!("request failed for {}", url))? + .text() + .context("reading response body")?; + + parse_book_html(options, &url, &body) +} + +pub fn parse_book_html( + options: &FetchOptions, + book_url: &str, + body: &str, +) -> Result<Vec<BookPackage>> { + let document = Html::parse_document(body); + let selector = Selector::parse("h1.sect1").unwrap(); + let numbering_re = + Regex::new(r"^(?P<chapter>\d+)\.(?P<section>\d+)\.\s+(?P<title>.+)$").unwrap(); + + let mut results = Vec::new(); + + for heading in document.select(&selector) { + let text = heading + .text() + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect::<Vec<_>>() + .join(" ") + .replace('\n', " ") + .trim() + .to_string(); + if text.is_empty() { + continue; + } + + let caps = match numbering_re.captures(&text) { + Some(caps) => caps, + None => continue, + }; + + let chapter_num: u32 = caps["chapter"].parse().unwrap_or(0); + let section_num: u32 = caps["section"].parse().unwrap_or(0); + let title = caps["title"].trim(); + + let (name, version, variant) = match split_name_version(title) { + Some(parts) => parts, + None => continue, + }; + + let href = heading.value().id().map(|id| { + let mut base = book_url.to_string(); + if !base.contains('#') { + base.push('#'); + } + format!("{}{}", base, id) + }); + + results.push(BookPackage { + book: options.book, + chapter: Some(chapter_num), + section: Some(format!("{}.{}", chapter_num, section_num)), + name, + version: Some(version), + href, + md5: None, + stage: None, + variant, + notes: None, + }); + } + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ingest::BookKind; + + #[test] + fn parse_glfs_sample() { + let html = r#" + <html><body> + <h1 class=\"sect1\" id=\"ch12-coreutils\">12.4. Coreutils-9.8</h1> + </body></html> + "#; + let opts = FetchOptions::new("https://example.invalid/glfs", BookKind::Glfs); + let items = parse_book_html(&opts, "https://example.invalid/glfs/book.html", html).unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0].name, "Coreutils"); + assert_eq!(items[0].version.as_deref(), Some("9.8")); + } +} diff --git a/src/ingest/lfs.rs b/src/ingest/lfs.rs new file mode 100644 index 0000000..a9d2f37 --- /dev/null +++ b/src/ingest/lfs.rs @@ -0,0 +1,169 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use reqwest::blocking::Client; +use scraper::{Html, Selector}; + +use super::{BookPackage, FetchOptions}; + +pub fn fetch_book(options: &FetchOptions) -> Result<Vec<BookPackage>> { + let base = options.base_url.trim_end_matches('/'); + let url = format!("{base}/book.html"); + + let client = Client::builder().build().context("building HTTP client")?; + let body = client + .get(&url) + .send() + .with_context(|| format!("fetching {}", url))? + .error_for_status() + .with_context(|| format!("request failed for {}", url))? + .text() + .context("reading response body")?; + + parse_book_html(options, &url, &body) +} + +pub fn parse_book_html( + options: &FetchOptions, + book_url: &str, + body: &str, +) -> Result<Vec<BookPackage>> { + let document = Html::parse_document(body); + let selector = Selector::parse("h1.sect1").unwrap(); + + let numbering_re = + Regex::new(r"^(?P<chapter>\d+)\.(?P<section>\d+)\.\s+(?P<title>.+)$").unwrap(); + + let mut results = Vec::new(); + + for heading in document.select(&selector) { + let text = heading + .text() + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect::<Vec<_>>() + .join(" ") + .replace('\n', " ") + .trim() + .to_string(); + if text.is_empty() { + continue; + } + + let caps = match numbering_re.captures(&text) { + Some(caps) => caps, + None => continue, + }; + let chapter_num: u32 = caps["chapter"].parse().unwrap_or(0); + let section_num: u32 = caps["section"].parse().unwrap_or(0); + let title = caps["title"].trim(); + + let (name, version, variant) = match split_name_version(title) { + Some(parts) => parts, + None => continue, + }; + + let stage = stage_for_chapter(chapter_num).map(|s| s.to_string()); + let identifier = format!("{chapter_num}.{section_num:02}"); + + let href = heading.value().id().map(|id| { + let mut base = book_url.to_string(); + if !base.contains('#') { + base.push('#'); + } + format!("{}{}", base, id) + }); + + results.push(BookPackage { + book: options.book, + chapter: Some(chapter_num), + section: Some(identifier), + name, + version: Some(version), + href, + md5: None, + stage, + variant, + notes: None, + }); + } + + Ok(results) +} + +pub(crate) fn split_name_version(title: &str) -> Option<(String, String, Option<String>)> { + // Find the last '-' whose next character is a digit (start of version) + let bytes = title.as_bytes(); + for idx in (0..bytes.len()).rev() { + if bytes[idx] == b'-' { + if let Some(next) = bytes.get(idx + 1) { + if next.is_ascii_digit() { + let name = title[..idx].trim(); + let mut remainder = title[idx + 1..].trim(); + if name.is_empty() || remainder.is_empty() { + return None; + } + + let mut variant = None; + if let Some(pos) = remainder.find(" - ") { + variant = Some(remainder[pos + 3..].trim().to_string()); + remainder = remainder[..pos].trim(); + } else if let Some(pos) = remainder.find(" (") { + let note = remainder[pos + 1..].trim_end_matches(')').trim(); + variant = Some(note.to_string()); + remainder = remainder[..pos].trim(); + } + + return Some((name.to_string(), remainder.to_string(), variant)); + } + } + } + } + None +} + +fn stage_for_chapter(chapter: u32) -> Option<&'static str> { + match chapter { + 5 => Some("cross-toolchain"), + 6 | 7 => Some("temporary-tools"), + 8 => Some("system"), + 9 => Some("system-configuration"), + 10 => Some("system-finalization"), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ingest::BookKind; + use scraper::{Html, Selector}; + + #[test] + fn parse_sample_headings() { + let html = r#" + <html><body> + <h1 class=\"sect1\" id=\"ch05-binutils-pass1\">5.5. Binutils-2.45 - Pass 1</h1> + <h1 class=\"sect1\" id=\"ch05-gcc-pass1\">5.6. GCC-15.2.0 - Pass 1</h1> + <h1 class=\"sect1\" id=\"ch09-bootscripts\">9.3. LFS-Bootscripts-20250827</h1> + <h1 class=\"sect1\" id=\"ch08-xml-parser\">8.41. XML::Parser-2.47</h1> + </body></html> + "#; + let opts = FetchOptions::new("https://example.invalid/lfs", BookKind::Mlfs); + let document = Html::parse_document(html); + let selector = Selector::parse("h1.sect1").unwrap(); + assert!( + document.select(&selector).next().is_some(), + "sample headings selector returned no nodes" + ); + let packages = + parse_book_html(&opts, "https://example.invalid/lfs/book.html", html).unwrap(); + assert_eq!(packages.len(), 4); + assert_eq!(packages[0].name, "Binutils"); + assert_eq!(packages[0].version.as_deref(), Some("2.45")); + assert_eq!(packages[0].variant.as_deref(), Some("Pass 1")); + assert_eq!(packages[0].stage.as_deref(), Some("cross-toolchain")); + assert_eq!(packages[1].variant.as_deref(), Some("Pass 1")); + assert_eq!(packages[2].variant, None); + assert_eq!(packages[3].name, "XML::Parser"); + } +} diff --git a/src/ingest/mod.rs b/src/ingest/mod.rs new file mode 100644 index 0000000..cb88b9e --- /dev/null +++ b/src/ingest/mod.rs @@ -0,0 +1,67 @@ +pub mod blfs; +pub mod glfs; +pub mod lfs; + +use std::fmt; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BookKind { + Lfs, + Mlfs, + Blfs, + Glfs, +} + +impl fmt::Display for BookKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = match self { + BookKind::Lfs => "lfs", + BookKind::Mlfs => "mlfs", + BookKind::Blfs => "blfs", + BookKind::Glfs => "glfs", + }; + f.write_str(label) + } +} + +#[derive(Debug, Clone)] +pub struct BookPackage { + pub book: BookKind, + pub chapter: Option<u32>, + pub section: Option<String>, + pub name: String, + pub version: Option<String>, + pub href: Option<String>, + pub md5: Option<String>, + pub stage: Option<String>, + pub variant: Option<String>, + pub notes: Option<String>, +} + +impl BookPackage { + pub fn identifier(&self) -> String { + match &self.variant { + Some(variant) if !variant.is_empty() => { + format!( + "{}-{}-{}", + self.book, + self.name, + variant.replace(' ', "-").to_lowercase() + ) + } + _ => format!("{}-{}", self.book, self.name), + } + } +} + +#[derive(Debug, Clone)] +pub struct FetchOptions<'a> { + pub base_url: &'a str, + pub book: BookKind, +} + +impl<'a> FetchOptions<'a> { + pub fn new(base_url: &'a str, book: BookKind) -> Self { + Self { base_url, book } + } +} diff --git a/src/lib.rs b/src/lib.rs index 04159f2..e28f156 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,12 @@ +pub mod ai; +pub mod db; +pub mod html; +pub mod ingest; +pub mod md5_utils; +pub mod mirrors; pub mod pkgs; +pub mod version_check; +pub mod wget_list; + +#[cfg(feature = "tui")] pub mod tui; diff --git a/src/main.rs b/src/main.rs index 79dd5fe..44a7b0a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,427 @@ -mod tui; +use std::{collections::BTreeSet, env, fs, path::PathBuf}; + +use anyhow::{Context, Result, anyhow}; +use clap::{CommandFactory, Parser, Subcommand}; + +use package_management::{ + db, html, md5_utils, + pkgs::{ + by_name::bi::binutils::cross_toolchain::build_binutils_from_page, + mlfs, + scaffolder::{self, ScaffoldRequest}, + }, + version_check, wget_list, +}; + +#[cfg(feature = "tui")] +use package_management::tui::disk_manager::DiskManager; + +#[derive(Parser)] +#[command(name = "lpkg", version, about = "LPKG – Lightweight Package Manager", long_about = None)] +struct Cli { + /// Command to run. Defaults to launching the TUI (when available). + #[command(subcommand)] + command: Option<Command>, +} + +#[derive(Subcommand)] +enum Command { + /// Run one of the automated workflows. + Workflow { + #[command(subcommand)] + workflow: WorkflowCommand, + }, + /// Launch interactive terminal UIs. + #[cfg(feature = "tui")] + #[command(subcommand)] + Tui(TuiCommand), +} + +#[derive(Subcommand)] +enum WorkflowCommand { + /// Fetch <pre> blocks from the given URL and run version checks found inside them. + EnvCheck { + /// URL of the Linux From Scratch page containing ver_check/ver_kernel snippets. + url: String, + }, + /// Download the LFS wget-list and md5sums, optionally writing them to disk. + FetchManifests { + /// Output directory to store wget-list and md5sums files. Uses current dir if omitted. + #[arg(long)] + output: Option<PathBuf>, + }, + /// Parse the Binutils Pass 1 page and build it using the extracted steps. + BuildBinutils { + /// URL of the Binutils Pass 1 instructions to parse. + url: String, + /// Root directory of the LFS workspace (used for $LFS paths). + #[arg(long = "lfs-root")] + lfs_root: PathBuf, + /// Optional explicit cross-compilation target (defaults to $LFS_TGT env or x86_64-lfs-linux-gnu). + #[arg(long)] + target: Option<String>, + }, + /// Scaffold a new package module under `src/pkgs/by_name` with tuned optimizations. + ScaffoldPackage { + /// Logical package name (used for module layout and metadata). + #[arg(long)] + name: String, + /// Upstream version string. + #[arg(long)] + version: String, + /// Optional source archive URL. + #[arg(long)] + source: Option<String>, + /// Optional MD5 checksum of the source archive. + #[arg(long)] + md5: Option<String>, + /// Additional configure arguments (repeat flag). + #[arg(long = "configure-arg", value_name = "ARG")] + configure_arg: Vec<String>, + /// Build commands (repeat flag). + #[arg(long = "build-cmd", value_name = "CMD")] + build_cmd: Vec<String>, + /// Install commands (repeat flag). + #[arg(long = "install-cmd", value_name = "CMD")] + install_cmd: Vec<String>, + /// Declared dependencies (repeat flag). + #[arg(long = "dependency", value_name = "PKG")] + dependency: Vec<String>, + /// Whether to enable LTO (defaults to true). + #[arg(long = "enable-lto", default_value_t = true)] + enable_lto: bool, + /// Whether to enable PGO instrumentation/use (defaults to true). + #[arg(long = "enable-pgo", default_value_t = true)] + enable_pgo: bool, + /// Additional CFLAGS (repeat flag). + #[arg(long = "cflag", value_name = "FLAG")] + cflag: Vec<String>, + /// Additional LDFLAGS (repeat flag). + #[arg(long = "ldflag", value_name = "FLAG")] + ldflag: Vec<String>, + /// Optional profile data file name for PGO replay (enables -fprofile-use). + #[arg(long)] + profdata: Option<String>, + /// Base directory for module generation (defaults to src/pkgs/by_name). + #[arg(long, default_value = "src/pkgs/by_name")] + base: PathBuf, + }, + /// Import all packages from the MLFS catalogue, scaffolding modules and persisting metadata. + ImportMlfs { + /// Perform a dry run without writing files or touching the database. + #[arg(long, default_value_t = false)] + dry_run: bool, + /// Only process the first N records (after deduplication). + #[arg(long)] + limit: Option<usize>, + /// Base directory for module generation (defaults to src/pkgs/by_name). + #[arg(long, default_value = "src/pkgs/by_name")] + base: PathBuf, + /// Overwrite existing modules by deleting and regenerating them. + #[arg(long, default_value_t = false)] + overwrite: bool, + /// Source URL for the MLFS book (defaults to the canonical mirror). + #[arg(long = "source-url")] + source_url: Option<String>, + }, +} + +#[cfg(feature = "tui")] +#[derive(Subcommand)] +enum TuiCommand { + /// Launch the disk manager UI. + DiskManager, +} + +fn main() -> Result<()> { + let _ = tracing_subscriber::fmt::try_init(); + + let cli = Cli::parse(); + + match cli.command { + Some(Command::Workflow { workflow }) => run_workflow(workflow)?, + #[cfg(feature = "tui")] + Some(Command::Tui(cmd)) => run_tui(cmd)?, + None => { + #[cfg(feature = "tui")] + { + println!( + "No command specified. Launching disk manager TUI. Use `lpkg help` for more options." + ); + DiskManager::run_tui().map_err(|e| anyhow!(e.to_string()))?; + } + + #[cfg(not(feature = "tui"))] + { + Cli::command().print_help()?; + println!(); + } + } + } + + Ok(()) +} + +fn run_workflow(cmd: WorkflowCommand) -> Result<()> { + match cmd { + WorkflowCommand::EnvCheck { url } => { + let pre_blocks = html::fetch_pre_blocks(&url) + .with_context(|| format!("Fetching HTML `<pre>` blocks from {url}"))?; + + let mut ran_any = false; + let mut failures = Vec::new(); + + for (idx, block) in pre_blocks.iter().enumerate() { + if !(block.contains("ver_check") || block.contains("ver_kernel")) { + continue; + } + + ran_any = true; + println!("Running version checks from block #{idx}..."); + if !version_check::run_version_checks_from_block(block) { + failures.push(idx + 1); + } + } + + if !ran_any { + return Err(anyhow!( + "No ver_check or ver_kernel snippets found at {url}." + )); + } + + if !failures.is_empty() { + return Err(anyhow!("Version checks failed in block(s): {:?}", failures)); + } + + println!("All version checks passed πŸ‘"); + } + WorkflowCommand::FetchManifests { output } => { + let wget_list = wget_list::get_wget_list().context("Fetching wget-list")?; + let md5sums = md5_utils::get_md5sums().context("Fetching md5sums")?; + + println!("Fetched wget-list ({} bytes)", wget_list.len()); + println!("Fetched md5sums ({} bytes)", md5sums.len()); + + let target_dir = output.unwrap_or(std::env::current_dir()?); + fs::create_dir_all(&target_dir) + .with_context(|| format!("Creating output directory at {:?}", target_dir))?; + + let wget_path = target_dir.join("wget-list"); + let md5_path = target_dir.join("md5sums"); + + fs::write(&wget_path, wget_list).with_context(|| format!("Writing {wget_path:?}"))?; + fs::write(&md5_path, md5sums).with_context(|| format!("Writing {md5_path:?}"))?; + + println!("Saved artifacts to {:?} and {:?}", wget_path, md5_path); + } + WorkflowCommand::BuildBinutils { + url, + lfs_root, + target, + } => { + let runtime = tokio::runtime::Runtime::new().context("Creating async runtime")?; + runtime + .block_on(build_binutils_from_page(&url, &lfs_root, target)) + .map_err(|e| anyhow!("Building Binutils using instructions from {url}: {e}"))?; + + println!("Binutils workflow completed successfully"); + } + WorkflowCommand::ScaffoldPackage { + name, + version, + source, + md5, + configure_arg, + build_cmd, + install_cmd, + dependency, + enable_lto, + enable_pgo, + cflag, + ldflag, + profdata, + base, + } => { + let base_dir = if base.is_relative() { + env::current_dir() + .context("Resolving scaffold base directory")? + .join(base) + } else { + base + }; + + let request = ScaffoldRequest { + name: name.clone(), + version: version.clone(), + source, + md5, + configure_args: configure_arg, + build_commands: build_cmd, + install_commands: install_cmd, + dependencies: dependency, + enable_lto, + enable_pgo, + cflags: cflag, + ldflags: ldflag, + profdata, + stage: None, + variant: None, + notes: None, + module_override: None, + }; + + let scaffold = scaffolder::scaffold_package(&base_dir, request) + .with_context(|| format!("Scaffolding package {name}"))?; + + let pool = db::establish_pool().context("Setting up package database")?; + db::upsert_package_via_pool(&pool, &scaffold.definition) + .with_context(|| format!("Persisting package metadata for {name}"))?; + + println!("Generated module: {:?}", scaffold.module_path); + println!( + "Remember to stage and commit as `{name}: init at {version}` after reviewing the template" + ); + } + WorkflowCommand::ImportMlfs { + dry_run, + limit, + base, + overwrite, + source_url, + } => { + let base_dir = if base.is_relative() { + env::current_dir() + .context("Resolving MLFS scaffold base directory")? + .join(base) + } else { + base + }; + + let mut records = mlfs::load_or_fetch_catalog(source_url.as_deref()) + .context("Loading MLFS catalogue")?; + records.sort_by(|a, b| a.name.cmp(&b.name).then(a.variant.cmp(&b.variant))); + + let mut seen = BTreeSet::new(); + let mut processed = 0usize; + let mut created = 0usize; + let mut skipped = Vec::new(); + + let pool = if dry_run { + None + } else { + Some(db::establish_pool().context("Setting up package database")?) + }; + + for record in records { + let module_alias = record.module_alias(); + if !seen.insert(module_alias.clone()) { + continue; + } + + if let Some(limit) = limit { + if processed >= limit { + break; + } + } + processed += 1; + + if dry_run { + println!( + "Would scaffold {:<18} {:<12} -> {}", + record.name, record.version, module_alias + ); + continue; + } + + let request = ScaffoldRequest { + name: record.name.clone(), + version: record.version.clone(), + source: None, + md5: None, + configure_args: Vec::new(), + build_commands: Vec::new(), + install_commands: Vec::new(), + dependencies: Vec::new(), + enable_lto: true, + enable_pgo: true, + cflags: Vec::new(), + ldflags: Vec::new(), + profdata: None, + stage: record.stage.clone(), + variant: record.variant.clone(), + notes: record.notes.clone(), + module_override: Some(module_alias.clone()), + }; + + match scaffolder::scaffold_package(&base_dir, request) { + Ok(result) => { + if let Some(pool) = &pool { + db::upsert_package_via_pool(pool, &result.definition).with_context( + || { + format!( + "Persisting MLFS package metadata for {} {}", + record.name, record.version + ) + }, + )?; + } + println!( + "Scaffolded {:<18} {:<12} -> {}", + record.name, record.version, module_alias + ); + created += 1; + } + Err(err) => { + let already_exists = + err.to_string().to_lowercase().contains("already exists"); + if already_exists && !overwrite { + skipped.push(module_alias); + } else { + return Err(err); + } + } + } + } + + if dry_run { + println!( + "Dry run complete. {} package definitions queued.", + processed + ); + } else { + println!( + "MLFS import complete. Created {} modules, skipped {} (already existed).", + created, + skipped.len() + ); + if !skipped.is_empty() { + println!( + "Skipped modules: {}", + skipped + .iter() + .take(10) + .cloned() + .collect::<Vec<_>>() + .join(", ") + ); + if skipped.len() > 10 { + println!("... and {} more", skipped.len() - 10); + } + } + } + } + } + + Ok(()) +} + +#[cfg(feature = "tui")] +fn run_tui(cmd: TuiCommand) -> Result<()> { + match cmd { + TuiCommand::DiskManager => { + DiskManager::run_tui().map_err(|e| anyhow!(e.to_string()))?; + } + } -fn main() -> Result<(), Box<dyn std::error::Error>> { - tui::disk_manager::DiskManager::run_tui()?; Ok(()) } diff --git a/src/pkgs/mlfs.rs b/src/pkgs/mlfs.rs new file mode 100644 index 0000000..214da30 --- /dev/null +++ b/src/pkgs/mlfs.rs @@ -0,0 +1,116 @@ +use std::borrow::Cow; + +use anyhow::{Context, Result, anyhow}; +use serde::{Deserialize, Serialize}; + +use crate::ingest::{BookKind, BookPackage, FetchOptions, lfs}; +use crate::pkgs::package::PackageDefinition; + +pub const DEFAULT_MLFS_BASE_URL: &str = "https://linuxfromscratch.org/~thomas/multilib-m32"; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MlfsPackageRecord { + pub name: String, + pub version: String, + pub chapter: Option<u32>, + pub section: Option<String>, + #[serde(default)] + pub stage: Option<String>, + #[serde(default)] + pub variant: Option<String>, + #[serde(default)] + pub notes: Option<String>, +} + +impl MlfsPackageRecord { + pub fn id(&self) -> String { + let mut id = self.name.replace('+', "plus"); + if let Some(variant) = &self.variant { + id.push('_'); + id.push_str(&variant.replace('-', "_")); + } + id + } + + pub fn module_alias(&self) -> String { + self.id() + .replace('.', "_") + .replace('/', "_") + .replace(' ', "_") + .to_lowercase() + } + + pub fn display_label(&self) -> Cow<'_, str> { + match (&self.section, &self.variant) { + (Some(section), Some(variant)) => Cow::from(format!("{} ({})", section, variant)), + (Some(section), None) => Cow::from(section.as_str()), + (None, Some(variant)) => Cow::from(variant.as_str()), + _ => Cow::from(self.name.as_str()), + } + } + + pub fn to_package_definition(&self) -> PackageDefinition { + let mut pkg = PackageDefinition::new(&self.name, &self.version); + if let Some(stage) = &self.stage { + pkg.optimizations + .cflags + .push(format!("-DLPKG_STAGE={}", stage.to_uppercase())); + } + if let Some(variant) = &self.variant { + pkg.optimizations + .cflags + .push(format!("-DLPKG_VARIANT={}", variant.to_uppercase())); + } + if let Some(notes) = &self.notes { + pkg.optimizations + .cflags + .push(format!("-DLPKG_NOTES={}", notes.replace(' ', "_"))); + } + pkg + } + + fn from_book_package(pkg: BookPackage) -> Option<Self> { + let version = pkg.version?; + Some(Self { + name: pkg.name, + version, + chapter: pkg.chapter, + section: pkg.section, + stage: pkg.stage, + variant: pkg.variant, + notes: pkg.notes, + }) + } +} + +pub fn fetch_catalog(base_url: &str) -> Result<Vec<MlfsPackageRecord>> { + let options = FetchOptions::new(base_url, BookKind::Mlfs); + let packages = lfs::fetch_book(&options)?; + let mut records = packages + .into_iter() + .filter_map(MlfsPackageRecord::from_book_package) + .collect::<Vec<_>>(); + if records.is_empty() { + return Err(anyhow!("No packages parsed from MLFS book at {base_url}.")); + } + records.sort_by(|a, b| a.name.cmp(&b.name).then(a.variant.cmp(&b.variant))); + Ok(records) +} + +pub fn load_cached_catalog() -> Result<Vec<MlfsPackageRecord>> { + let raw = include_str!("../../data/mlfs_ml-12.4-40-multilib.json"); + let records: Vec<MlfsPackageRecord> = + serde_json::from_str(raw).context("parsing cached MLFS package manifest")?; + Ok(records) +} + +pub fn load_or_fetch_catalog(base_url: Option<&str>) -> Result<Vec<MlfsPackageRecord>> { + let base = base_url.unwrap_or(DEFAULT_MLFS_BASE_URL); + match fetch_catalog(base) { + Ok(records) => Ok(records), + Err(err) => { + tracing::warn!("mlfs_fetch_error" = %err, "Falling back to cached MLFS package list"); + load_cached_catalog() + } + } +} diff --git a/src/pkgs/mod.rs b/src/pkgs/mod.rs index 9dacb43..90957bc 100644 --- a/src/pkgs/mod.rs +++ b/src/pkgs/mod.rs @@ -1 +1,4 @@ pub mod by_name; +pub mod mlfs; +pub mod package; +pub mod scaffolder; diff --git a/src/pkgs/package.rs b/src/pkgs/package.rs new file mode 100644 index 0000000..2513025 --- /dev/null +++ b/src/pkgs/package.rs @@ -0,0 +1,74 @@ +use serde::{Deserialize, Serialize}; + +/// High-level description of a package managed by LPKG. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PackageDefinition { + pub name: String, + pub version: String, + pub source: Option<String>, + pub md5: Option<String>, + pub configure_args: Vec<String>, + pub build_commands: Vec<String>, + pub install_commands: Vec<String>, + pub dependencies: Vec<String>, + pub optimizations: OptimizationSettings, +} + +impl PackageDefinition { + pub fn new(name: impl Into<String>, version: impl Into<String>) -> Self { + Self { + name: name.into(), + version: version.into(), + source: None, + md5: None, + configure_args: Vec::new(), + build_commands: Vec::new(), + install_commands: Vec::new(), + dependencies: Vec::new(), + optimizations: OptimizationSettings::default(), + } + } +} + +/// Tunable compiler and linker flags applied during package builds. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OptimizationSettings { + pub enable_lto: bool, + pub enable_pgo: bool, + pub cflags: Vec<String>, + pub ldflags: Vec<String>, + pub profdata: Option<String>, +} + +impl Default for OptimizationSettings { + fn default() -> Self { + Self { + enable_lto: true, + enable_pgo: true, + cflags: vec![ + "-O3".to_string(), + "-flto".to_string(), + "-fprofile-generate".to_string(), + ], + ldflags: vec!["-flto".to_string(), "-fprofile-generate".to_string()], + profdata: None, + } + } +} + +impl OptimizationSettings { + /// Convenience helper for disabling instrumentation once profile data has been gathered. + pub fn for_pgo_replay(profdata: impl Into<String>) -> Self { + Self { + enable_lto: true, + enable_pgo: true, + cflags: vec![ + "-O3".to_string(), + "-flto".to_string(), + "-fprofile-use".to_string(), + ], + ldflags: vec!["-flto".to_string(), "-fprofile-use".to_string()], + profdata: Some(profdata.into()), + } + } +} diff --git a/src/pkgs/scaffolder.rs b/src/pkgs/scaffolder.rs new file mode 100644 index 0000000..cc67ffc --- /dev/null +++ b/src/pkgs/scaffolder.rs @@ -0,0 +1,293 @@ +use std::fs::{self, OpenOptions}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, anyhow}; + +use crate::pkgs::package::{OptimizationSettings, PackageDefinition}; + +#[derive(Debug, Clone)] +pub struct ScaffoldRequest { + pub name: String, + pub version: String, + pub source: Option<String>, + pub md5: Option<String>, + pub configure_args: Vec<String>, + pub build_commands: Vec<String>, + pub install_commands: Vec<String>, + pub dependencies: Vec<String>, + pub enable_lto: bool, + pub enable_pgo: bool, + pub cflags: Vec<String>, + pub ldflags: Vec<String>, + pub profdata: Option<String>, + pub stage: Option<String>, + pub variant: Option<String>, + pub notes: Option<String>, + pub module_override: Option<String>, +} + +#[derive(Debug, Clone)] +pub struct ScaffoldResult { + pub module_path: PathBuf, + pub prefix_module: PathBuf, + pub by_name_module: PathBuf, + pub definition: PackageDefinition, +} + +pub fn scaffold_package( + base_dir: impl AsRef<Path>, + request: ScaffoldRequest, +) -> Result<ScaffoldResult> { + let base_dir = base_dir.as_ref(); + if !base_dir.ends_with("by_name") { + return Err(anyhow!("expected base directory ending with 'by_name'")); + } + + let module_source_name = request.module_override.as_deref().unwrap_or(&request.name); + let module_name = sanitize(module_source_name); + let prefix = prefix(&module_name); + + let prefix_dir = base_dir.join(&prefix); + fs::create_dir_all(&prefix_dir) + .with_context(|| format!("creating prefix directory {:?}", prefix_dir))?; + + let by_name_mod = base_dir.join("mod.rs"); + ensure_mod_entry(&by_name_mod, &prefix)?; + + let prefix_mod = prefix_dir.join("mod.rs"); + ensure_mod_entry(&prefix_mod, &module_name)?; + + let package_dir = prefix_dir.join(&module_name); + if package_dir.exists() { + return Err(anyhow!("package module {:?} already exists", package_dir)); + } + fs::create_dir_all(&package_dir) + .with_context(|| format!("creating package directory {:?}", package_dir))?; + + let module_path = package_dir.join("mod.rs"); + let definition = build_definition(&request); + let source = generate_module_source(&request, &definition); + fs::write(&module_path, source) + .with_context(|| format!("writing module source to {:?}", module_path))?; + + Ok(ScaffoldResult { + module_path, + prefix_module: prefix_mod, + by_name_module: by_name_mod, + definition, + }) +} + +fn ensure_mod_entry(path: &Path, module: &str) -> Result<()> { + let entry = format!("pub mod {};", module); + if path.exists() { + let contents = + fs::read_to_string(path).with_context(|| format!("reading module file {:?}", path))?; + if contents.contains(&entry) || contents.contains(&entry.trim()) { + return Ok(()); + } + let mut file = OpenOptions::new() + .append(true) + .open(path) + .with_context(|| format!("opening module file {:?}", path))?; + writeln!(file, "pub mod {};", module) + .with_context(|| format!("appending to module file {:?}", path))?; + } else { + fs::write(path, format!("pub mod {};\n", module)) + .with_context(|| format!("creating module file {:?}", path))?; + } + Ok(()) +} + +fn build_definition(request: &ScaffoldRequest) -> PackageDefinition { + let mut pkg = PackageDefinition::new(&request.name, &request.version); + pkg.source = request.source.clone(); + pkg.md5 = request.md5.clone(); + pkg.configure_args = request.configure_args.clone(); + pkg.build_commands = request.build_commands.clone(); + pkg.install_commands = request.install_commands.clone(); + pkg.dependencies = request.dependencies.clone(); + + let mut cflags = if request.cflags.is_empty() { + default_cflags(request) + } else { + request.cflags.clone() + }; + let mut ldflags = if request.ldflags.is_empty() { + default_ldflags(request) + } else { + request.ldflags.clone() + }; + dedup(&mut cflags); + dedup(&mut ldflags); + + let profdata = request.profdata.clone(); + let profdata_clone = profdata.clone(); + pkg.optimizations = match profdata_clone { + Some(path) => OptimizationSettings::for_pgo_replay(path), + None => OptimizationSettings::default(), + }; + pkg.optimizations.enable_lto = request.enable_lto; + pkg.optimizations.enable_pgo = request.enable_pgo; + pkg.optimizations.cflags = cflags; + pkg.optimizations.ldflags = ldflags; + pkg.optimizations.profdata = profdata; + + pkg +} + +fn default_cflags(request: &ScaffoldRequest) -> Vec<String> { + let mut flags = vec!["-O3".to_string(), "-flto".to_string()]; + if request.enable_pgo { + if request.profdata.is_some() { + flags.push("-fprofile-use".to_string()); + } else { + flags.push("-fprofile-generate".to_string()); + } + } + flags +} + +fn default_ldflags(request: &ScaffoldRequest) -> Vec<String> { + let mut flags = vec!["-flto".to_string()]; + if request.enable_pgo { + if request.profdata.is_some() { + flags.push("-fprofile-use".to_string()); + } else { + flags.push("-fprofile-generate".to_string()); + } + } + flags +} + +fn dedup(values: &mut Vec<String>) { + let mut seen = std::collections::BTreeSet::new(); + values.retain(|value| seen.insert(value.clone())); +} + +fn generate_module_source(request: &ScaffoldRequest, definition: &PackageDefinition) -> String { + let mut metadata = Vec::new(); + if let Some(stage) = &request.stage { + metadata.push(format!("stage: {}", stage)); + } + if let Some(variant) = &request.variant { + metadata.push(format!("variant: {}", variant)); + } + if let Some(notes) = &request.notes { + metadata.push(format!("notes: {}", notes)); + } + let metadata = if metadata.is_empty() { + String::new() + } else { + format!("// MLFS metadata: {}\n\n", metadata.join(", ")) + }; + let configure_args = format_vec(&definition.configure_args); + let build_commands = format_vec(&definition.build_commands); + let install_commands = format_vec(&definition.install_commands); + let dependencies = format_vec(&definition.dependencies); + let cflags = format_vec(&definition.optimizations.cflags); + let ldflags = format_vec(&definition.optimizations.ldflags); + let source = format_option(&definition.source); + let md5 = format_option(&definition.md5); + let profdata = format_option(&definition.optimizations.profdata); + + format!( + "{metadata}use crate::pkgs::package::{{OptimizationSettings, PackageDefinition}};\n\n\ + pub fn definition() -> PackageDefinition {{\n\ + let mut pkg = PackageDefinition::new(\"{name}\", \"{version}\");\n\ + pkg.source = {source};\n\ + pkg.md5 = {md5};\n\ + pkg.configure_args = {configure_args};\n\ + pkg.build_commands = {build_commands};\n\ + pkg.install_commands = {install_commands};\n\ + pkg.dependencies = {dependencies};\n\ + let profdata = {profdata};\n\ + let profdata_clone = profdata.clone();\n\ + pkg.optimizations = match profdata_clone {{\n\ + Some(path) => OptimizationSettings::for_pgo_replay(path),\n\ + None => OptimizationSettings::default(),\n\ + }};\n\ + pkg.optimizations.enable_lto = {enable_lto};\n\ + pkg.optimizations.enable_pgo = {enable_pgo};\n\ + pkg.optimizations.cflags = {cflags};\n\ + pkg.optimizations.ldflags = {ldflags};\n\ + pkg.optimizations.profdata = profdata;\n\ + pkg\n\ + }}\n", + metadata = metadata, + name = request.name, + version = request.version, + source = source, + md5 = md5, + configure_args = configure_args, + build_commands = build_commands, + install_commands = install_commands, + dependencies = dependencies, + profdata = profdata, + enable_lto = request.enable_lto, + enable_pgo = request.enable_pgo, + cflags = cflags, + ldflags = ldflags, + ) +} + +fn format_vec(values: &[String]) -> String { + if values.is_empty() { + "Vec::new()".to_string() + } else { + let items: Vec<String> = values + .iter() + .map(|v| format!("\"{}\".to_string()", escape(v))) + .collect(); + format!("vec![{}]", items.join(", ")) + } +} + +fn format_option(value: &Option<String>) -> String { + match value { + Some(v) => format!("Some(\"{}\".to_string())", escape(v)), + None => "None".to_string(), + } +} + +fn sanitize(name: &str) -> String { + let mut out = String::new(); + for ch in name.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else if ch == '_' || ch == '+' { + out.push('_'); + } else if ch == '-' { + out.push('_'); + } else { + out.push('_'); + } + } + if out.is_empty() { + out.push_str("pkg"); + } + if out + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) + { + out.insert(0, 'p'); + } + out +} + +fn prefix(module: &str) -> String { + let mut chars = module.chars(); + let first = chars.next().unwrap_or('p'); + let second = chars.next().unwrap_or('k'); + let mut s = String::new(); + s.push(first); + s.push(second); + s +} + +fn escape(input: &str) -> String { + input.replace('\\', "\\\\").replace('"', "\\\"") +} diff --git a/src/tui/disk_manager.rs b/src/tui/disk_manager.rs index 465c2d1..0a08278 100644 --- a/src/tui/disk_manager.rs +++ b/src/tui/disk_manager.rs @@ -1,7 +1,7 @@ // src/tui/disk_manager.rs use std::{ fs::{File, read_dir}, - io::{self, Seek, SeekFrom, Write}, + io::{self, Seek, SeekFrom}, path::PathBuf, }; @@ -186,12 +186,12 @@ impl DiskManager { }, }; - // Create list of lines to display: + // Create list of lines to display using public GPT API: let mut lines: Vec<String> = Vec::new(); lines.push(format!("Partitions on {}:", disk.display())); - for (i, entry_opt) in gpt.partitions.iter().enumerate() { - if let Some(entry) = entry_opt { - let name = entry.partition_name.to_string(); + for (i, entry) in gpt.iter() { + if entry.is_used() { + let name = entry.partition_name.as_str(); lines.push(format!( "{}: {} -> {} (type: {})", i, @@ -388,9 +388,9 @@ impl DiskManager { let sectors = (size_mb as u128 * 1024 * 1024 / 512) as u64; // choose starting LBA: find max ending_lba among existing partitions; align to 2048 let last_end = gpt - .partitions .iter() - .filter_map(|p| p.as_ref().map(|e| e.ending_lba)) + .filter(|(_, e)| e.is_used()) + .map(|(_, e)| e.ending_lba) .max() .unwrap_or(2048); let start = ((last_end + 2048) / 2048) * 2048 + 1; @@ -410,15 +410,15 @@ impl DiskManager { }; new_entry.partition_type_guid = type_guid; - // find first empty partition slot - let idx_opt = gpt.partitions.iter().position(|p| p.is_none()); + // find first empty partition slot (indexing is 1-based for gptman::GPT) + let idx_opt = gpt.iter().find(|(_, e)| e.is_unused()).map(|(i, _)| i); let idx = match idx_opt { Some(i) => i, None => return Err("No free GPT partition entries (maxed out)".into()), }; // assign and write - gpt.partitions[idx] = Some(new_entry); + gpt[idx] = new_entry; // Seek to start (important) file.seek(SeekFrom::Start(0))?; diff --git a/src/tui/main_menu.rs b/src/tui/main_menu.rs index 978d040..31051f1 100644 --- a/src/tui/main_menu.rs +++ b/src/tui/main_menu.rs @@ -1,7 +1,6 @@ use crate::tui::disk_manager::DiskManager; use crossterm::event::{self, Event, KeyCode}; use std::error::Error; -use std::io::Stdout; use tui::{ Terminal, backend::CrosstermBackend, @@ -11,7 +10,7 @@ use tui::{ }; pub fn show_main_menu() -> Result<(), Box<dyn Error>> { - let mut stdout = std::io::stdout(); + let stdout = std::io::stdout(); let backend = CrosstermBackend::new(stdout); let mut terminal = Terminal::new(backend)?; @@ -37,7 +36,7 @@ pub fn show_main_menu() -> Result<(), Box<dyn Error>> { if event::poll(std::time::Duration::from_millis(100))? { if let Event::Key(key) = event::read()? { match key.code { - KeyCode::Char('1') => DiskManager::show_disk_manager(&mut terminal)?, + KeyCode::Char('1') => DiskManager::run_tui()?, KeyCode::Char('0') => break, _ => {} } diff --git a/src/tui/settings.rs b/src/tui/settings.rs index d258dd8..8badd83 100644 --- a/src/tui/settings.rs +++ b/src/tui/settings.rs @@ -18,9 +18,9 @@ impl Theme { } impl Settings { - #[instrument(skip(terminal))] + #[instrument(skip(_terminal))] pub fn show_settings( - terminal: &mut Terminal<CrosstermBackend<Stdout>>, + _terminal: &mut Terminal<CrosstermBackend<Stdout>>, ) -> Result<(), Box<dyn std::error::Error>> { // Render settings UI here Ok(()) From 12e6d41e58b555db5271e4916f83262b5533dcc1 Mon Sep 17 00:00:00 2001 From: m00d <hellm00d@users.noreply.github.com> Date: Wed, 1 Oct 2025 07:04:29 +0200 Subject: [PATCH 03/10] Add metadata refresh command and jhalfs caching --- README.md | 9 +++++ ai/tasks.json | 7 ++++ docs/ARCHITECTURE.md | 7 ++-- docs/METADATA_PIPELINE.md | 22 +++++-------- src/bin/metadata_indexer.rs | 66 ++++++++++++++++++++++++++++++++++--- 5 files changed, 92 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 83de895..8eedfe7 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,15 @@ cargo run --bin metadata_indexer -- \ --dry-run ``` +Keep the jhalfs manifests current with: + +```bash +cargo run --bin metadata_indexer -- --base-dir . refresh +``` + +Passing `--books mlfs,blfs` restricts the refresh to specific books, and +`--force` bypasses the local cache. + ## πŸ“š Documentation - [Architecture Overview](docs/ARCHITECTURE.md) – high-level tour of the crate diff --git a/ai/tasks.json b/ai/tasks.json index abb56e2..c7fcb2f 100644 --- a/ai/tasks.json +++ b/ai/tasks.json @@ -51,6 +51,13 @@ "description": "Provide a standalone CLI to validate package metadata against the schema and regenerate ai/metadata/index.json.", "resolution": "Added src/bin/metadata_indexer.rs with schema validation, summary extraction, and index writer integration.", "owner": "default_cli" + }, + { + "id": "metadata-jhalfs-refresh", + "title": "Wire jhalfs manifests into metadata harvester", + "description": "Cache wget-list/md5sums from jhalfs and expose a CLI refresh command so harvesting can populate source URLs and checksums reliably.", + "resolution": "Extended metadata_indexer with a `refresh` subcommand, cached manifests under ai/metadata/cache/, and hooked harvest to populate MD5 checksums via jhalfs data.", + "owner": "default_cli" } ] } diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 19f9fb8..8fd1a96 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -42,8 +42,11 @@ artifacts under `ai/metadata/`: `ai/metadata/index.json` (use `--compact` for single-line JSON). - `harvest` – fetches a given book page, extracts build metadata, and emits a schema-compliant JSON skeleton. When direct HTML parsing does not locate the - source tarball, it falls back to the jhalfs `wget-list` data to populate - `source.urls`. + source tarball, it falls back to cached jhalfs manifests to populate + `source.urls` and MD5 checksums. +- `refresh` – downloads (or re-downloads with `--force`) the jhalfs manifests + (`wget-list`, `md5sums`) for one or more books and stores them under + `ai/metadata/cache/`. ## Module layout diff --git a/docs/METADATA_PIPELINE.md b/docs/METADATA_PIPELINE.md index 895031c..10ee366 100644 --- a/docs/METADATA_PIPELINE.md +++ b/docs/METADATA_PIPELINE.md @@ -19,7 +19,8 @@ This document explains the workflow and the supporting assets. | ------- | ----------- | | `validate` | Loads every package JSON file and validates it against `schema.json`. Reports schema violations and summary extraction errors. | | `index` | Re-runs validation and regenerates `index.json`. Use `--compact` to write a single-line JSON payload. | -| `harvest` | Fetches a book page, scrapes build instructions, and emits a draft metadata record (to stdout with `--dry-run` or into `ai/metadata/packages/`). | +| `harvest` | Fetches a book page, scrapes build instructions, and emits a draft metadata record (to stdout with `--dry-run` or into `ai/metadata/packages/`). Falls back to jhalfs manifests when inline source links are absent. | +| `refresh` | Updates cached jhalfs manifests (`wget-list`, `md5sums`) under `ai/metadata/cache/`. Supports `--books` filtering and `--force` to bypass the cache. | ### Harvesting flow @@ -32,11 +33,10 @@ This document explains the workflow and the supporting assets. 4. **Artifact stats** – `div.segmentedlist` entries supply SBU and disk usage. 5. **Source URLs** – the harvester tries two strategies: - Inline HTML links inside the page (common for BLFS articles). - - Fallback to the jhalfs `wget-list` for the selected book (currently MLFS) - using `package-management::wget_list::get_wget_list` to find matching - `<package>-<version>` entries. -6. **Checksums** – integration with the book’s `md5sums` mirror is pending; - placeholder wiring exists (`src/md5_utils.rs`). + - Fallback to the cached jhalfs `wget-list` for the selected book to find + matching `<package>-<version>` entries. +6. **Checksums** – the matching entry from the cached jhalfs `md5sums` + manifest populates `source.checksums` when the archive name is known. 7. **Status** – unresolved items (missing URLs, anchors, etc.) are recorded in `status.issues` so humans can interrogate or patch the draft before promoting it. @@ -46,8 +46,6 @@ This document explains the workflow and the supporting assets. - **Source links via tables** – some MLFS chapters list download links inside a β€œPackage Information” table. The current implementation relies on the jhalfs `wget-list` fallback instead of parsing that table. -- **Checksums** – MD5 lookups from jhalfs are planned but not yet wired into - the harvest pipeline. - **Anchor discovery** – if the heading lacks an explicit `id` attribute, the scraper attempts to locate child anchors or scan the raw HTML. If none are found, a warning is recorded and `status.issues` contains a reminder. @@ -55,17 +53,15 @@ This document explains the workflow and the supporting assets. ## Using jhalfs manifests The maintained `wget-list`/`md5sums` files hosted by jhalfs provide canonical -source URLs and hashes. The helper modules `src/wget_list.rs` and -`src/md5_utils.rs` download these lists for the multilib LFS book. The -harvester currently consumes the wget-list as a fallback; integrating the -`md5sums` file will let us emit `source.checksums` automatically. +source URLs and hashes. The `metadata_indexer refresh` command keeps these +manifests cached under `ai/metadata/cache/`. Harvesting consumes the cached +copies to populate URLs and MD5 checksums. Planned enhancements (see `ai/notes.md` and `ai/bugs.json#metadata-harvest-no-source-urls`): 1. Abstract list fetching so BLFS/GLFS variants can reuse the logic. 2. Normalise the match criteria for package + version (handling pass stages, suffixes, etc.). -3. Populate checksum entries alongside URLs. ## Manual review checklist diff --git a/src/bin/metadata_indexer.rs b/src/bin/metadata_indexer.rs index f058903..3ba638c 100644 --- a/src/bin/metadata_indexer.rs +++ b/src/bin/metadata_indexer.rs @@ -37,6 +37,15 @@ enum Command { #[arg(long)] compact: bool, }, + /// Refresh cached jhalfs manifests for the given book(s) + Refresh { + /// Books to refresh (defaults to all known books) + #[arg(long, value_delimiter = ',', default_value = "mlfs,lfs,blfs,glfs")] + books: Vec<String>, + /// Force re-download even if cache files exist + #[arg(long)] + force: bool, + }, /// Fetch and draft metadata for a specific package page Harvest { /// Book identifier (lfs, mlfs, blfs, glfs) @@ -181,6 +190,37 @@ fn main() -> Result<()> { ); } } + Command::Refresh { books, force } => { + let unique: HashSet<_> = books.into_iter().map(|b| b.to_lowercase()).collect(); + let mut refreshed = 0usize; + for book in unique { + for kind in [ManifestKind::WgetList, ManifestKind::Md5Sums] { + match refresh_manifest(&metadata_dir, &book, kind, force) { + Ok(path) => { + refreshed += 1; + println!( + "Refreshed {} manifest for {} -> {}", + kind.description(), + book, + path.display() + ); + } + Err(err) => { + eprintln!( + "warning: failed to refresh {} manifest for {}: {}", + kind.description(), + book, + err + ); + } + } + } + } + + if refreshed == 0 { + println!("No manifests refreshed (check warnings above)."); + } + } } Ok(()) @@ -697,6 +737,7 @@ struct SourceUrlEntry { kind: &'static str, } +#[derive(Clone, Copy)] enum ManifestKind { WgetList, Md5Sums, @@ -709,6 +750,13 @@ impl ManifestKind { ManifestKind::Md5Sums => "md5sums.txt", } } + + fn description(&self) -> &'static str { + match self { + ManifestKind::WgetList => "wget-list", + ManifestKind::Md5Sums => "md5sums", + } + } } fn collect_tarball_urls(page_url: &str, document: &Html) -> Vec<SourceUrlEntry> { @@ -824,14 +872,24 @@ fn resolve_checksums( } fn load_jhalfs_manifest(metadata_dir: &Path, book: &str, kind: ManifestKind) -> Result<String> { + let cache_path = refresh_manifest(metadata_dir, book, kind, false)?; + fs::read_to_string(&cache_path) + .with_context(|| format!("reading cached manifest {}", cache_path.display())) +} + +fn refresh_manifest( + metadata_dir: &Path, + book: &str, + kind: ManifestKind, + force: bool, +) -> Result<PathBuf> { let cache_dir = metadata_dir.join("cache"); fs::create_dir_all(&cache_dir) .with_context(|| format!("creating cache directory {}", cache_dir.display()))?; let cache_path = cache_dir.join(format!("{}-{}", book, kind.filename())); - if cache_path.exists() { - return fs::read_to_string(&cache_path) - .with_context(|| format!("reading cached manifest {}", cache_path.display())); + if cache_path.exists() && !force { + return Ok(cache_path); } let url = manifest_url(book, &kind) @@ -850,7 +908,7 @@ fn load_jhalfs_manifest(metadata_dir: &Path, book: &str, kind: ManifestKind) -> fs::write(&cache_path, &body) .with_context(|| format!("caching manifest {}", cache_path.display()))?; - Ok(body) + Ok(cache_path) } fn manifest_url(book: &str, kind: &ManifestKind) -> Option<&'static str> { From f890ae833256b7982501eaa9e55f1c6283a33fa1 Mon Sep 17 00:00:00 2001 From: m00d <hellm00d@users.noreply.github.com> Date: Wed, 1 Oct 2025 07:06:06 +0200 Subject: [PATCH 04/10] Add context snapshot and drop duplicate MLFS metadata --- ai/context.md | 19 +++ ai/metadata/packages/mlfs/binutils-pass1.json | 147 ------------------ 2 files changed, 19 insertions(+), 147 deletions(-) create mode 100644 ai/context.md delete mode 100644 ai/metadata/packages/mlfs/binutils-pass1.json diff --git a/ai/context.md b/ai/context.md new file mode 100644 index 0000000..59111b9 --- /dev/null +++ b/ai/context.md @@ -0,0 +1,19 @@ +# Repository Context Snapshot + +- `README.md`, `docs/ARCHITECTURE.md`, and `docs/METADATA_PIPELINE.md` describe + the crate structure, CLI entry points, and metadata workflows. Consult them + first when revisiting the project. +- `metadata_indexer` now supports a `refresh` command that pulls jhalfs + `wget-list`/`md5sums` manifests into `ai/metadata/cache/` and the `harvest` + command automatically draws URLs and checksums from those manifests. +- AI state lives under `ai/`: + - `ai/personas.json`, `ai/tasks.json`, `ai/bugs.json` track personas, + outstanding work, and known issues. + - `ai/metadata/` stores package records plus the JSON schema. + - `ai/notes.md` captures ongoing research ideas (e.g., deeper BLFS/GLFS + manifest coverage). +- Duplicate MLFS metadata entries were pruned (`binutils-pass1.json` removed in + favour of the `binutils-pass-1.json` slug). + +This file is intended as a quick orientation checkpoint alongside the richer +architecture docs. diff --git a/ai/metadata/packages/mlfs/binutils-pass1.json b/ai/metadata/packages/mlfs/binutils-pass1.json deleted file mode 100644 index 303ca2d..0000000 --- a/ai/metadata/packages/mlfs/binutils-pass1.json +++ /dev/null @@ -1,147 +0,0 @@ -{ - "schema_version": "v0.1.0", - "package": { - "id": "mlfs/binutils/pass1", - "name": "Binutils", - "upstream": "gnu/binutils", - "version": "2.45", - "book": "mlfs", - "chapter": 5, - "section": "5.02", - "stage": "cross-toolchain", - "variant": "Pass 1", - "anchors": { - "section": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html#ch-tools-binutils-pass1" - } - }, - "source": { - "urls": [ - { - "url": "https://ftp.gnu.org/gnu/binutils/binutils-2.45.tar.xz", - "kind": "primary" - }, - { - "url": "https://ftpmirror.gnu.org/binutils/binutils-2.45.tar.xz", - "kind": "mirror" - } - ], - "archive": "binutils-2.45.tar.xz" - }, - "artifacts": { - "sbu": 1, - "disk": 678, - "install_prefix": "$LFS/tools" - }, - "dependencies": { - "build": [ - { "name": "bash" }, - { "name": "coreutils" }, - { "name": "gcc", "optional": true } - ], - "runtime": [] - }, - "environment": { - "variables": [ - { - "name": "LFS", - "description": "Absolute path to mounted LFS workspace" - }, - { - "name": "LFS_TGT", - "description": "Target triple for cross toolchain" - } - ], - "users": [] - }, - "build": [ - { - "phase": "setup", - "commands": [ - "tar -xf binutils-2.45.tar.xz", - "cd binutils-2.45", - "mkdir -v build", - "cd build" - ] - }, - { - "phase": "configure", - "commands": [ - "../configure --prefix=$LFS/tools \\", - " --with-sysroot=$LFS \\", - " --target=$LFS_TGT \\", - " --disable-nls \\", - " --enable-gprofng=no \\", - " --disable-werror \\", - " --enable-new-dtags \\", - " --enable-default-hash-style=gnu" - ], - "cwd": "build" - }, - { - "phase": "build", - "commands": [ - "make" - ], - "cwd": "build" - }, - { - "phase": "test", - "commands": [ - "make -k check" - ], - "cwd": "build", - "notes": "Tests are optional for cross-toolchain; failures can be ignored" - }, - { - "phase": "install", - "commands": [ - "make install" - ], - "cwd": "build" - } - ], - "optimizations": { - "enable_lto": true, - "enable_pgo": true, - "cflags": ["-O3", "-flto", "-fprofile-generate"], - "ldflags": ["-flto", "-fprofile-generate"], - "profdata": null - }, - "tests": [ - { - "commands": [ - "make -k check" - ], - "optional": true, - "expected_failures": ["gas/run/elf-x86-64-reloc.sh"] - } - ], - "post_install": [ - { - "commands": [ - "rm -v $LFS/tools/lib/libbfd.a", - "rm -v $LFS/tools/lib/libctf-nobfd.a" - ], - "description": "Remove static libraries per LFS guidance" - } - ], - "notes": [ - { - "severity": "warning", - "text": "Ensure the host uses recent flex/bison to avoid configure warnings." - } - ], - "provenance": { - "book_release": "ml-12.4-40-multilib", - "page_url": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html", - "retrieved_at": "2025-03-09T00:00:00Z", - "content_hash": "0000000000000000000000000000000000000000000000000000000000000000" - }, - "status": { - "state": "draft", - "issues": [ - "Checksums not yet verified", - "Dependency list requires confirmation" - ] - } -} From 0517c200b0e4d8d74ccc2ac1fd1304ac562356e1 Mon Sep 17 00:00:00 2001 From: m00d <hellm00d@users.noreply.github.com> Date: Wed, 1 Oct 2025 07:08:31 +0200 Subject: [PATCH 05/10] Allow null upstream in metadata schema --- ai/metadata/schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ai/metadata/schema.json b/ai/metadata/schema.json index 3fae44b..8405871 100644 --- a/ai/metadata/schema.json +++ b/ai/metadata/schema.json @@ -24,7 +24,7 @@ "minLength": 1 }, "upstream": { - "type": "string" + "type": ["string", "null"] }, "version": { "type": "string", From 54d76578e27c2ff1fc86bb28a93316ef361974b7 Mon Sep 17 00:00:00 2001 From: m00d <hellm00d@users.noreply.github.com> Date: Wed, 1 Oct 2025 07:11:02 +0200 Subject: [PATCH 06/10] Update tasks for metadata-to-Rust module pipeline --- ai/tasks.json | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/ai/tasks.json b/ai/tasks.json index c7fcb2f..86576dd 100644 --- a/ai/tasks.json +++ b/ai/tasks.json @@ -6,6 +6,7 @@ "title": "Import all MLFS packages into lpkg", "description": "Parse the Multilib LFS book and scaffold package definitions with optimization defaults (LTO/PGO/-O3).", "blocked_on": [ + "Finalize metadata -> Rust module generation pipeline", "Implement automated parser" ], "owner": "mlfs_researcher" @@ -28,6 +29,16 @@ "Implement incremental update workflow" ], "owner": "mlfs_researcher" + }, + { + "id": "rust-module-generator", + "title": "Generate package modules from harvested metadata", + "description": "Transform harvested metadata into Rust files under src/pkgs/by_name, wiring PackageDefinition data directly.", + "blocked_on": [ + "Define translation scheme from metadata to PackageDefinition", + "Integrate generator with metadata_indexer output" + ], + "owner": "default_cli" } ], "solved": [ From 205ab25d41623d4b97c7fbaa8c73003dd0ad080f Mon Sep 17 00:00:00 2001 From: m00d <hellm00d@users.noreply.github.com> Date: Wed, 1 Oct 2025 07:13:51 +0200 Subject: [PATCH 07/10] Sketch metadata-to-Rust generator plan --- ai/notes.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/ai/notes.md b/ai/notes.md index fd2e428..8bf1323 100644 --- a/ai/notes.md +++ b/ai/notes.md @@ -13,3 +13,34 @@ auto-fill checksums when harvesting metadata for MLFS/BLFS/GLFS packages. sources. - Benefits: avoids fragile HTML tables, keeps URLs aligned with official build scripts, and ensures checksums are up-to-date. + +# Metadata β†’ Rust Module Strategy + +Goal: emit Rust modules under `src/pkgs/by_name` directly from harvested +metadata once MLFS/BLFS/GLFS records are validated. + +Outline: +1. **Schema alignment** – Ensure harvested JSON carries everything the + `PackageDefinition` constructor expects (source URLs, checksums, build + commands, dependencies, optimisation flags, notes/stage metadata). +2. **Translation layer** – Implement a converter (likely in a new module, + e.g. `src/pkgs/generator.rs`) that reads a metadata JSON file and produces a + `ScaffoldRequest` or directly writes the module source via the existing + scaffolder. +3. **Naming/layout** – Derive module paths from `package.id` (e.g. + `mlfs/binutils-pass-1` β†’ `src/pkgs/by_name/bi/binutils/pass_1/mod.rs`) while + preserving the prefix/slug conventions already used by the scaffolder. +4. **CLI integration** – Add a subcommand (`metadata_indexer generate`) that + accepts a list of package IDs or a glob, feeds each through the translator, + and optionally stages the resulting Rust files. +5. **Diff safety** – Emit modules to a temporary location first, compare + against existing files, and only overwrite when changes are detected; keep a + `--dry-run` mode for review. +6. **Tests/checks** – After generation, run `cargo fmt` and `cargo check` to + ensure the new modules compile; optionally add schema fixtures covering edge + cases (variants, multiple URLs, absent checksums). + +Open questions: +- How to represent optional post-install steps or multi-phase builds inside the + generated module (additional helper functions vs. raw command arrays). +- Where to store PGO workload hints once the PGO infrastructure is defined. From c19c5c21ab994ad1c81e7cf63e3bbb96eb783d33 Mon Sep 17 00:00:00 2001 From: m00d <hellm00d@users.noreply.github.com> Date: Wed, 1 Oct 2025 07:26:20 +0200 Subject: [PATCH 08/10] Add metadata generator and CLI integration --- README.md | 11 ++ ai/context.md | 4 +- docs/METADATA_PIPELINE.md | 1 + src/bin/metadata_indexer.rs | 39 ++++++ src/pkgs/generator.rs | 236 ++++++++++++++++++++++++++++++++++++ src/pkgs/mod.rs | 1 + 6 files changed, 291 insertions(+), 1 deletion(-) create mode 100644 src/pkgs/generator.rs diff --git a/README.md b/README.md index 8eedfe7..745642a 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,17 @@ cargo run --bin metadata_indexer -- --base-dir . refresh Passing `--books mlfs,blfs` restricts the refresh to specific books, and `--force` bypasses the local cache. +To materialise a Rust module from harvested metadata: + +```bash +cargo run --bin metadata_indexer -- \ + --base-dir . generate \ + --metadata ai/metadata/packages/mlfs/binutils-pass-1.json \ + --output target/generated/by_name +``` + +Add `--overwrite` to regenerate an existing module directory. + ## πŸ“š Documentation - [Architecture Overview](docs/ARCHITECTURE.md) – high-level tour of the crate diff --git a/ai/context.md b/ai/context.md index 59111b9..bf67ce1 100644 --- a/ai/context.md +++ b/ai/context.md @@ -5,7 +5,9 @@ first when revisiting the project. - `metadata_indexer` now supports a `refresh` command that pulls jhalfs `wget-list`/`md5sums` manifests into `ai/metadata/cache/` and the `harvest` - command automatically draws URLs and checksums from those manifests. + command automatically draws URLs and checksums from those manifests. A + `generate` subcommand consumes harvested metadata and scaffolds Rust modules + under `src/pkgs/by_name` (or a custom output directory). - AI state lives under `ai/`: - `ai/personas.json`, `ai/tasks.json`, `ai/bugs.json` track personas, outstanding work, and known issues. diff --git a/docs/METADATA_PIPELINE.md b/docs/METADATA_PIPELINE.md index 10ee366..dfc4197 100644 --- a/docs/METADATA_PIPELINE.md +++ b/docs/METADATA_PIPELINE.md @@ -21,6 +21,7 @@ This document explains the workflow and the supporting assets. | `index` | Re-runs validation and regenerates `index.json`. Use `--compact` to write a single-line JSON payload. | | `harvest` | Fetches a book page, scrapes build instructions, and emits a draft metadata record (to stdout with `--dry-run` or into `ai/metadata/packages/`). Falls back to jhalfs manifests when inline source links are absent. | | `refresh` | Updates cached jhalfs manifests (`wget-list`, `md5sums`) under `ai/metadata/cache/`. Supports `--books` filtering and `--force` to bypass the cache. | +| `generate` | Translates harvested metadata into Rust modules under `src/pkgs/by_name` (or a specified directory), using the scaffolder to create `PackageDefinition` wrappers. | ### Harvesting flow diff --git a/src/bin/metadata_indexer.rs b/src/bin/metadata_indexer.rs index 3ba638c..13fe78c 100644 --- a/src/bin/metadata_indexer.rs +++ b/src/bin/metadata_indexer.rs @@ -13,6 +13,8 @@ use serde_json::{Value, json}; use sha2::{Digest, Sha256}; use walkdir::WalkDir; +use package_management::pkgs::generator; + #[derive(Parser)] #[command( name = "metadata-indexer", @@ -64,6 +66,18 @@ enum Command { #[arg(long)] dry_run: bool, }, + /// Generate Rust modules from harvested metadata + Generate { + /// Path to the harvested metadata JSON file + #[arg(long)] + metadata: PathBuf, + /// Output directory (should be the `by_name` root) + #[arg(long, default_value = "src/pkgs/by_name")] + output: PathBuf, + /// Remove existing module directory before regeneration + #[arg(long)] + overwrite: bool, + }, } fn main() -> Result<()> { @@ -221,6 +235,31 @@ fn main() -> Result<()> { println!("No manifests refreshed (check warnings above)."); } } + Command::Generate { + metadata, + output, + overwrite, + } => { + if overwrite { + match generator::module_directory(&metadata, &output) { + Ok(dir) if dir.exists() => { + fs::remove_dir_all(&dir).with_context(|| { + format!("removing existing module {}", dir.display()) + })?; + } + Ok(_) => {} + Err(err) => { + eprintln!( + "warning: could not determine existing module directory: {}", + err + ); + } + } + } + + let module_path = generator::generate_module(&metadata, &output)?; + println!("Generated module at {}", module_path.display()); + } } Ok(()) diff --git a/src/pkgs/generator.rs b/src/pkgs/generator.rs new file mode 100644 index 0000000..a215b3c --- /dev/null +++ b/src/pkgs/generator.rs @@ -0,0 +1,236 @@ +use std::collections::HashSet; +use std::fs; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, anyhow}; +use serde::Deserialize; + +use crate::pkgs::scaffolder::{self, ScaffoldRequest}; + +#[derive(Debug, Deserialize)] +struct HarvestedPackage { + package: HarvestedMetadata, + source: HarvestedSource, + #[serde(default)] + build: Vec<CommandPhase>, + #[serde(default)] + dependencies: Option<HarvestedDependencies>, + optimizations: HarvestedOptimisations, +} + +#[derive(Debug, Deserialize)] +struct HarvestedMetadata { + id: String, + name: String, + version: String, + #[serde(default)] + stage: Option<String>, + #[serde(default)] + variant: Option<String>, + #[serde(default)] + notes: Option<String>, +} + +#[derive(Debug, Deserialize)] +struct HarvestedSource { + #[serde(default)] + archive: Option<String>, + #[serde(default)] + urls: Vec<HarvestedUrl>, + #[serde(default)] + checksums: Vec<HarvestedChecksum>, +} + +#[derive(Debug, Deserialize)] +struct HarvestedUrl { + url: String, +} + +#[derive(Debug, Deserialize)] +struct HarvestedChecksum { + alg: String, + value: String, +} + +#[derive(Debug, Deserialize)] +struct HarvestedOptimisations { + enable_lto: bool, + enable_pgo: bool, + #[serde(default)] + cflags: Vec<String>, + #[serde(default)] + ldflags: Vec<String>, + #[serde(default)] + profdata: Option<String>, +} + +#[derive(Debug, Deserialize)] +struct CommandPhase { + #[serde(default)] + phase: Option<String>, + #[serde(default)] + commands: Vec<String>, + #[serde(default)] + cwd: Option<String>, + #[serde(default)] + requires_root: Option<bool>, + #[serde(default)] + notes: Option<String>, +} + +#[derive(Debug, Deserialize)] +struct HarvestedDependencies { + #[serde(default)] + build: Vec<String>, + #[serde(default)] + runtime: Vec<String>, +} + +/// Generate a Rust module from harvested metadata, returning the path to the generated file. +pub fn generate_module( + metadata_path: impl AsRef<Path>, + base_dir: impl AsRef<Path>, +) -> Result<PathBuf> { + let harvested = parse_metadata(metadata_path.as_ref())?; + let request = build_request(&harvested)?; + let result = scaffolder::scaffold_package(base_dir.as_ref(), request)?; + Ok(result.module_path) +} + +/// Compute the directory for a module derived from the given metadata. +pub fn module_directory( + metadata_path: impl AsRef<Path>, + base_dir: impl AsRef<Path>, +) -> Result<PathBuf> { + let harvested = parse_metadata(metadata_path.as_ref())?; + let slug = module_override_from_id(&harvested.package.id).ok_or_else(|| { + anyhow!( + "unable to derive module slug from id '{}'", + harvested.package.id + ) + })?; + let module = sanitize_module_name(&slug); + let dir = base_dir + .as_ref() + .join(prefix_from_module(&module)) + .join(module); + Ok(dir) +} + +fn build_request(pkg: &HarvestedPackage) -> Result<ScaffoldRequest> { + let slug = module_override_from_id(&pkg.package.id) + .ok_or_else(|| anyhow!("unable to derive module slug from id '{}'", pkg.package.id))?; + + let mut build_commands = Vec::new(); + let mut install_commands = Vec::new(); + for command in flatten_commands(&pkg.build) { + if command.contains("make install") { + install_commands.push(command); + } else { + build_commands.push(command); + } + } + + let mut dependencies = HashSet::new(); + if let Some(deps) = &pkg.dependencies { + for dep in &deps.build { + dependencies.insert(dep.clone()); + } + for dep in &deps.runtime { + dependencies.insert(dep.clone()); + } + } + let mut dependencies: Vec<String> = dependencies.into_iter().collect(); + dependencies.sort(); + + let request = ScaffoldRequest { + name: pkg.package.name.clone(), + version: pkg.package.version.clone(), + source: pkg.source.urls.first().map(|u| u.url.clone()), + md5: pkg + .source + .checksums + .iter() + .find(|c| c.alg.eq_ignore_ascii_case("md5")) + .map(|c| c.value.clone()), + configure_args: Vec::new(), + build_commands, + install_commands, + dependencies, + enable_lto: pkg.optimizations.enable_lto, + enable_pgo: pkg.optimizations.enable_pgo, + cflags: pkg.optimizations.cflags.clone(), + ldflags: pkg.optimizations.ldflags.clone(), + profdata: pkg.optimizations.profdata.clone(), + stage: pkg.package.stage.clone(), + variant: pkg.package.variant.clone(), + notes: pkg.package.notes.clone(), + module_override: Some(slug), + }; + + Ok(request) +} + +fn flatten_commands(phases: &[CommandPhase]) -> Vec<String> { + phases + .iter() + .flat_map(|phase| phase.commands.iter().cloned()) + .collect() +} + +fn module_override_from_id(id: &str) -> Option<String> { + let slug = match id.split_once('/') { + Some((_, slug)) => slug, + None => id, + }; + Some( + slug.replace('.', "_") + .replace('/', "_") + .replace('-', "_") + .replace(' ', "_") + .to_lowercase(), + ) +} + +fn parse_metadata(path: &Path) -> Result<HarvestedPackage> { + let metadata = fs::read_to_string(path) + .with_context(|| format!("reading metadata file {}", path.display()))?; + let harvested: HarvestedPackage = serde_json::from_str(&metadata) + .with_context(|| format!("parsing harvested metadata from {}", path.display()))?; + Ok(harvested) +} + +fn sanitize_module_name(name: &str) -> String { + let mut out = String::new(); + for ch in name.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else if ch == '_' || ch == '+' || ch == '-' { + out.push('_'); + } else { + out.push('_'); + } + } + if out.is_empty() { + out.push_str("pkg"); + } + if out + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) + { + out.insert(0, 'p'); + } + out +} + +fn prefix_from_module(module: &str) -> String { + let mut chars = module.chars(); + let first = chars.next().unwrap_or('p'); + let second = chars.next().unwrap_or('k'); + let mut s = String::new(); + s.push(first); + s.push(second); + s +} diff --git a/src/pkgs/mod.rs b/src/pkgs/mod.rs index 90957bc..d7c40a7 100644 --- a/src/pkgs/mod.rs +++ b/src/pkgs/mod.rs @@ -1,4 +1,5 @@ pub mod by_name; +pub mod generator; pub mod mlfs; pub mod package; pub mod scaffolder; From 93fca09be1d5140de466bdc908e5f38310b6ea5a Mon Sep 17 00:00:00 2001 From: m00d <hellm00d@users.noreply.github.com> Date: Wed, 1 Oct 2025 07:28:41 +0200 Subject: [PATCH 09/10] Document package generation workflow --- README.md | 2 ++ ai/context.md | 3 +- docs/ARCHITECTURE.md | 2 +- docs/PACKAGE_GENERATION.md | 61 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 docs/PACKAGE_GENERATION.md diff --git a/README.md b/README.md index 745642a..e239454 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,8 @@ Add `--overwrite` to regenerate an existing module directory. layout, binaries, and supporting modules. - [Metadata Harvesting Pipeline](docs/METADATA_PIPELINE.md) – how the metadata indexer produces and validates the JSON records under `ai/metadata/`. +- [Package Module Generation](docs/PACKAGE_GENERATION.md) – end-to-end guide + for converting harvested metadata into Rust modules under `src/pkgs/by_name/`. - `ai/notes.md` – scratchpad for ongoing research tasks (e.g., deeper jhalfs integration). diff --git a/ai/context.md b/ai/context.md index bf67ce1..f3f35c1 100644 --- a/ai/context.md +++ b/ai/context.md @@ -7,7 +7,8 @@ `wget-list`/`md5sums` manifests into `ai/metadata/cache/` and the `harvest` command automatically draws URLs and checksums from those manifests. A `generate` subcommand consumes harvested metadata and scaffolds Rust modules - under `src/pkgs/by_name` (or a custom output directory). + under `src/pkgs/by_name` (or a custom output directory). See + `docs/PACKAGE_GENERATION.md` for the CLI flow. - AI state lives under `ai/`: - `ai/personas.json`, `ai/tasks.json`, `ai/bugs.json` track personas, outstanding work, and known issues. diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 8fd1a96..3af7875 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -9,7 +9,7 @@ outline the main entry points and how the supporting modules fit together. | Binary | Location | Purpose | | ------ | -------- | ------- | | `lpkg` | `src/main.rs` | Primary command-line interface with workflow automation and optional TUI integration. | -| `metadata_indexer` | `src/bin/metadata_indexer.rs` | Harvests LFS/BLFS/GLFS package metadata, validates it against the JSON schema, and keeps `ai/metadata/index.json` up to date. | +| `metadata_indexer` | `src/bin/metadata_indexer.rs` | Harvests LFS/BLFS/GLFS package metadata, validates it against the JSON schema, keeps `ai/metadata/index.json` up to date, and can scaffold Rust modules from harvested records. | ### `lpkg` workflows diff --git a/docs/PACKAGE_GENERATION.md b/docs/PACKAGE_GENERATION.md new file mode 100644 index 0000000..39279c9 --- /dev/null +++ b/docs/PACKAGE_GENERATION.md @@ -0,0 +1,61 @@ +# Package Module Generation + +This document explains how harvested metadata is transformed into concrete +Rust modules under `src/pkgs/by_name/`. + +## Overview + +1. **Harvest metadata** – Use `metadata_indexer harvest` to capture package data + from the LFS/BLFS/GLFS books. Each record is written to + `ai/metadata/packages/<book>/<slug>.json`. +2. **Refresh manifests** – Run + `metadata_indexer refresh` to ensure the jhalfs `wget-list` and `md5sums` + caches are up to date. Harvesting relies on these caches for canonical + source URLs and checksums. +3. **Generate modules** – Use + `metadata_indexer generate --metadata <path> --output <by_name_dir>` to turn a + metadata file into a full Rust module that exposes a `PackageDefinition`. + +Generated modules leverage the existing scaffolder logic, so the command will +create any missing prefix directories (e.g. `bi/mod.rs`) and populate the final +`mod.rs` file with the correct code template. + +## Command reference + +```bash +# Harvest metadata from a book page +cargo run --bin metadata_indexer -- --base-dir . harvest \ + --book mlfs \ + --page chapter05/binutils-pass1 \ + --output ai/metadata/packages/mlfs/binutils-pass-1.json + +# Refresh jhalfs manifests (optional but recommended) +cargo run --bin metadata_indexer -- --base-dir . refresh + +# Generate a module under the standard src tree +cargo run --bin metadata_indexer -- --base-dir . generate \ + --metadata ai/metadata/packages/mlfs/binutils-pass-1.json \ + --output src/pkgs/by_name \ + --overwrite +``` + +### Flags + +- `--output` defaults to `src/pkgs/by_name`. Point it to another directory if + you want to stage modules elsewhere (e.g. `target/generated/by_name`). +- `--overwrite` deletes the existing module directory before scaffolding a new + one. + +After generation, run `cargo fmt` and `cargo check` to ensure the crate compiles +with the new modules. + +## Implementation notes + +- Metadata fields such as `build`, `dependencies`, and `optimizations` are + mapped directly onto the scaffolder’s `ScaffoldRequest` type. +- Source URLs and MD5 checksums are sourced from the harvested metadata + (populated via the jhalfs manifests). +- The module slug is derived from `package.id` (e.g. + `mlfs/binutils-pass-1` β†’ `src/pkgs/by_name/bi/binutils_pass_1/mod.rs`). + +See the code in `src/pkgs/generator.rs` for the full translation logic. From 3f74a55897c2cf0597657fc89946f76f81456041 Mon Sep 17 00:00:00 2001 From: m00d <hellm00d@users.noreply.github.com> Date: Wed, 1 Oct 2025 08:05:17 +0200 Subject: [PATCH 10/10] meow --- ai/metadata/cache/lfs-md5sums.txt | 93 ++++++ ai/metadata/cache/lfs-wget-list.txt | 95 ++++++ ai/metadata/index.json | 36 ++- ai/metadata/packages/mlfs/gcc-pass-1.json | 174 +++++++++++ ai/metadata/packages/mlfs/glibc.json | 289 +++++++++++++++++++ ai/metadata/packages/mlfs/linux-headers.json | 81 ++++++ src/pkgs/by_name/bi/binutils_pass_1/mod.rs | 38 +++ src/pkgs/by_name/bi/mod.rs | 1 + src/pkgs/by_name/gc/gcc_pass_1/mod.rs | 66 +++++ src/pkgs/by_name/gc/mod.rs | 1 + src/pkgs/by_name/gl/glibc/mod.rs | 74 +++++ src/pkgs/by_name/gl/mod.rs | 1 + src/pkgs/by_name/li/linux/mod.rs | 30 ++ src/pkgs/by_name/li/mod.rs | 1 + src/pkgs/by_name/mod.rs | 3 + 15 files changed, 980 insertions(+), 3 deletions(-) create mode 100644 ai/metadata/cache/lfs-md5sums.txt create mode 100644 ai/metadata/cache/lfs-wget-list.txt create mode 100644 ai/metadata/packages/mlfs/gcc-pass-1.json create mode 100644 ai/metadata/packages/mlfs/glibc.json create mode 100644 ai/metadata/packages/mlfs/linux-headers.json create mode 100644 src/pkgs/by_name/bi/binutils_pass_1/mod.rs create mode 100644 src/pkgs/by_name/gc/gcc_pass_1/mod.rs create mode 100644 src/pkgs/by_name/gc/mod.rs create mode 100644 src/pkgs/by_name/gl/glibc/mod.rs create mode 100644 src/pkgs/by_name/gl/mod.rs create mode 100644 src/pkgs/by_name/li/linux/mod.rs create mode 100644 src/pkgs/by_name/li/mod.rs diff --git a/ai/metadata/cache/lfs-md5sums.txt b/ai/metadata/cache/lfs-md5sums.txt new file mode 100644 index 0000000..6eab539 --- /dev/null +++ b/ai/metadata/cache/lfs-md5sums.txt @@ -0,0 +1,93 @@ +590765dee95907dbc3c856f7255bd669 acl-2.3.2.tar.xz +227043ec2f6ca03c0948df5517f9c927 attr-2.5.2.tar.gz +1be79f7106ab6767f18391c5e22be701 autoconf-2.72.tar.xz +4017e96f89fca45ca946f1c5db6be714 automake-1.16.5.tar.xz +ad5b38410e3bf0e9bcc20e2765f5e3f9 bash-5.2.21.tar.gz +e249b1f86f886d6fb71c15f72b65dd3d bc-6.7.5.tar.xz +a075178a9646551379bfb64040487715 binutils-2.42.tar.xz +c28f119f405a2304ff0a7ccdcc629713 bison-3.8.2.tar.xz +67e051268d0c475ea773822f7500d0e5 bzip2-1.0.8.tar.gz +50fcafcecde5a380415b12e9c574e0b2 check-0.15.2.tar.gz +459e9546074db2834eefe5421f250025 coreutils-9.4.tar.xz +68c5208c58236eba447d7d6d1326b821 dejagnu-1.6.3.tar.gz +2745c50f6f4e395e7b7d52f902d075bf diffutils-3.10.tar.xz +6b4f18a33873623041857b4963641ee9 e2fsprogs-1.47.0.tar.gz +79ad698e61a052bea79e77df6a08bc4b elfutils-0.190.tar.bz2 +bd169cb11f4b9bdfddadf9e88a5c4d4b expat-2.6.0.tar.xz +00fce8de158422f5ccd2666512329bd2 expect5.45.4.tar.gz +26b2a96d4e3a8938827a1e572afd527a file-5.45.tar.gz +4a4a547e888a944b2f3af31d789a1137 findutils-4.9.0.tar.xz +2882e3179748cc9f9c23ec593d6adc8d flex-2.6.4.tar.gz +3bc52f1952b9a78361114147da63c35b flit_core-3.9.0.tar.gz +97c5a7d83f91a7e1b2035ebbe6ac7abd gawk-5.3.0.tar.xz +e0e48554cc6e4f261d55ddee9ab69075 gcc-13.2.0.tar.xz +8551961e36bf8c70b7500d255d3658ec gdbm-1.23.tar.gz +2d8507d003ef3ddd1c172707ffa97ed8 gettext-0.22.4.tar.xz +be81e87f72b5ea2c0ffe2bedfeb680c6 glibc-2.39.tar.xz +956dc04e864001a9c22429f761f2c283 gmp-6.3.0.tar.xz +9e251c0a618ad0824b51117d5d9db87e gperf-3.1.tar.gz +7c9bbd74492131245f7cdb291fa142c0 grep-3.11.tar.xz +5e4f40315a22bb8a158748e7d5094c7d groff-1.23.0.tar.gz +60c564b1bdc39d8e43b3aab4bc0fb140 grub-2.12.tar.xz +d5c9fc9441288817a4a0be2da0249e29 gzip-1.13.tar.xz +aed66d04de615d76c70890233081e584 iana-etc-20240125.tar.gz +9e5a6dfd2d794dc056a770e8ad4a9263 inetutils-2.5.tar.xz +12e517cac2b57a0121cda351570f1e63 intltool-0.51.0.tar.gz +35d8277d1469596b7edc07a51470a033 iproute2-6.7.0.tar.xz +caf5418c851eac59e70a78d9730d4cea Jinja2-3.1.3.tar.gz +e2fd7adccf6b1e98eb1ae8d5a1ce5762 kbd-2.6.4.tar.xz +6165867e1836d51795a11ea4762ff66a kmod-31.tar.xz +cf05e2546a3729492b944b4874dd43dd less-643.tar.gz +a236eaa9a1f699bc3fb6ab2acd7e7b6c lfs-bootscripts-20230728.tar.xz +4667bacb837f9ac4adb4a1a0266f4b65 libcap-2.69.tar.xz +0da1a5ed7786ac12dcbaf0d499d8a049 libffi-3.4.4.tar.gz +1a48b5771b9f6c790fb4efdb1ac71342 libpipeline-1.5.7.tar.gz +2fc0b6ddcd66a89ed6e45db28fa44232 libtool-2.4.7.tar.xz +b84cd4104e08c975063ec6c4d0372446 libxcrypt-4.4.36.tar.xz +370e1b6155ae63133380e421146619e0 linux-6.7.4.tar.xz +0d90823e1426f1da2fd872df0311298d m4-1.4.19.tar.xz +c8469a3713cbbe04d955d4ae4be23eeb make-4.4.1.tar.gz +67e0052fa200901b314fad7b68c9db27 man-db-2.12.0.tar.xz +26b39e38248144156d437e1e10cb20bf man-pages-6.06.tar.xz +8fe7227653f2fb9b1ffe7f9f2058998a MarkupSafe-2.1.5.tar.gz +2d0ebd3a24249617b1c4d30026380cf8 meson-1.3.2.tar.gz +5c9bc658c9fd0f940e8e3e0f09530c62 mpc-1.3.1.tar.gz +523c50c6318dde6f9dc523bc0244690a mpfr-4.2.1.tar.xz +c5367e829b6d9f3f97b280bb3e6bfbc3 ncurses-6.4-20230520.tar.xz +32151c08211d7ca3c1d832064f6939b0 ninja-1.11.1.tar.gz +c239213887804ba00654884918b37441 openssl-3.2.1.tar.gz +78ad9937e4caadcba1526ef1853730d5 patch-2.7.6.tar.xz +d3957d75042918a23ec0abac4a2b7e0a perl-5.38.2.tar.xz +bc29d74c2483197deb9f1f3b414b7918 pkgconf-2.1.1.tar.xz +2f747fc7df8ccf402d03e375c565cf96 procps-ng-4.0.4.tar.xz +ed3206da1184ce9e82d607dc56c52633 psmisc-23.6.tar.xz +e7c178b97bf8f7ccd677b94d614f7b3c Python-3.12.2.tar.xz +8a6310f6288e7f60c3565277ec3b5279 python-3.12.2-docs-html.tar.bz2 +4aa1b31be779e6b84f9a96cb66bc50f6 readline-8.2.tar.gz +6aac9b2dbafcd5b7a67a8a9bcb8036c3 sed-4.9.tar.xz +6f6eb780ce12c90d81ce243747ed7ab0 setuptools-69.1.0.tar.gz +452b0e59f08bf618482228ba3732d0ae shadow-4.14.5.tar.xz +c70599ab0d037fde724f7210c2c8d7f8 sysklogd-1.5.1.tar.gz +521cda27409a9edf0370c128fae3e690 systemd-255.tar.gz +1ebe54d7a80f9abf8f2d14ddfeb2432d systemd-man-pages-255.tar.xz +81a05f28d7b67533cfc778fcadea168c sysvinit-3.08.tar.xz +a2d8042658cfd8ea939e6d911eaf4152 tar-1.35.tar.xz +0e4358aade2f5db8a8b6f2f6d9481ec2 tcl8.6.13-src.tar.gz +4452f2f6d557f5598cca17b786d6eb68 tcl8.6.13-html.tar.gz +edd9928b4a3f82674bcc3551616eef3b texinfo-7.1.tar.xz +2349edd8335245525cc082f2755d5bf4 tzdata2024a.tar.gz +acd4360d8a5c3ef320b9db88d275dae6 udev-lfs-20230818.tar.xz +f3591e6970c017bb4bcd24ae762a98f5 util-linux-2.39.3.tar.xz +79dfe62be5d347b1325cbd5ce2a1f9b3 vim-9.1.0041.tar.gz +802ad6e5f9336fcb1c76b7593f0cd22d wheel-0.42.0.tar.gz +89a8e82cfd2ad948b349c0a69c494463 XML-Parser-2.47.tar.gz +7ade7bd1181a731328f875bec62a9377 xz-5.4.6.tar.xz +9855b6d802d7fe5b7bd5b196a2271655 zlib-1.3.1.tar.gz +63251602329a106220e0a5ad26ba656f zstd-1.5.5.tar.gz +2d1691a629c558e894dbb78ee6bf34ef bash-5.2.21-upstream_fixes-1.patch +6a5ac7e89b791aae556de0f745916f7f bzip2-1.0.8-install_docs-1.patch +cca7dc8c73147444e77bc45d210229bb coreutils-9.4-i18n-1.patch +9a5997c3452909b1769918c759eff8a2 glibc-2.39-fhs-1.patch +f75cca16a38da6caa7d52151f7136895 kbd-2.6.4-backspace-1.patch +9ed497b6cb8adcb8dbda9dee9ebce791 readline-8.2-upstream_fixes-3.patch +17ffccbb8e18c39e8cedc32046f3a475 sysvinit-3.08-consolidated-1.patch diff --git a/ai/metadata/cache/lfs-wget-list.txt b/ai/metadata/cache/lfs-wget-list.txt new file mode 100644 index 0000000..459b251 --- /dev/null +++ b/ai/metadata/cache/lfs-wget-list.txt @@ -0,0 +1,95 @@ +https://download.savannah.gnu.org/releases/acl/acl-2.3.2.tar.xz +https://download.savannah.gnu.org/releases/attr/attr-2.5.2.tar.gz +https://ftp.gnu.org/gnu/autoconf/autoconf-2.72.tar.xz +https://ftp.gnu.org/gnu/automake/automake-1.16.5.tar.xz +https://ftp.gnu.org/gnu/bash/bash-5.2.21.tar.gz +https://github.com/gavinhoward/bc/releases/download/6.7.5/bc-6.7.5.tar.xz +https://sourceware.org/pub/binutils/releases/binutils-2.42.tar.xz +https://ftp.gnu.org/gnu/bison/bison-3.8.2.tar.xz +https://www.sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz +https://github.com/libcheck/check/releases/download/0.15.2/check-0.15.2.tar.gz +https://ftp.gnu.org/gnu/coreutils/coreutils-9.4.tar.xz +https://dbus.freedesktop.org/releases/dbus/dbus-1.14.10.tar.xz +https://ftp.gnu.org/gnu/dejagnu/dejagnu-1.6.3.tar.gz +https://ftp.gnu.org/gnu/diffutils/diffutils-3.10.tar.xz +https://downloads.sourceforge.net/project/e2fsprogs/e2fsprogs/v1.47.0/e2fsprogs-1.47.0.tar.gz +https://sourceware.org/ftp/elfutils/0.190/elfutils-0.190.tar.bz2 +https://prdownloads.sourceforge.net/expat/expat-2.6.0.tar.xz +https://prdownloads.sourceforge.net/expect/expect5.45.4.tar.gz +https://astron.com/pub/file/file-5.45.tar.gz +https://ftp.gnu.org/gnu/findutils/findutils-4.9.0.tar.xz +https://github.com/westes/flex/releases/download/v2.6.4/flex-2.6.4.tar.gz +https://pypi.org/packages/source/f/flit-core/flit_core-3.9.0.tar.gz +https://ftp.gnu.org/gnu/gawk/gawk-5.3.0.tar.xz +https://ftp.gnu.org/gnu/gcc/gcc-13.2.0/gcc-13.2.0.tar.xz +https://ftp.gnu.org/gnu/gdbm/gdbm-1.23.tar.gz +https://ftp.gnu.org/gnu/gettext/gettext-0.22.4.tar.xz +https://ftp.gnu.org/gnu/glibc/glibc-2.39.tar.xz +https://ftp.gnu.org/gnu/gmp/gmp-6.3.0.tar.xz +https://ftp.gnu.org/gnu/gperf/gperf-3.1.tar.gz +https://ftp.gnu.org/gnu/grep/grep-3.11.tar.xz +https://ftp.gnu.org/gnu/groff/groff-1.23.0.tar.gz +https://ftp.gnu.org/gnu/grub/grub-2.12.tar.xz +https://ftp.gnu.org/gnu/gzip/gzip-1.13.tar.xz +https://github.com/Mic92/iana-etc/releases/download/20240125/iana-etc-20240125.tar.gz +https://ftp.gnu.org/gnu/inetutils/inetutils-2.5.tar.xz +https://launchpad.net/intltool/trunk/0.51.0/+download/intltool-0.51.0.tar.gz +https://www.kernel.org/pub/linux/utils/net/iproute2/iproute2-6.7.0.tar.xz +https://pypi.org/packages/source/J/Jinja2/Jinja2-3.1.3.tar.gz +https://www.kernel.org/pub/linux/utils/kbd/kbd-2.6.4.tar.xz +https://www.kernel.org/pub/linux/utils/kernel/kmod/kmod-31.tar.xz +https://www.greenwoodsoftware.com/less/less-643.tar.gz +https://www.linuxfromscratch.org/lfs/downloads/12.1/lfs-bootscripts-20230728.tar.xz +https://www.kernel.org/pub/linux/libs/security/linux-privs/libcap2/libcap-2.69.tar.xz +https://github.com/libffi/libffi/releases/download/v3.4.4/libffi-3.4.4.tar.gz +https://download.savannah.gnu.org/releases/libpipeline/libpipeline-1.5.7.tar.gz +https://ftp.gnu.org/gnu/libtool/libtool-2.4.7.tar.xz +https://github.com/besser82/libxcrypt/releases/download/v4.4.36/libxcrypt-4.4.36.tar.xz +https://www.kernel.org/pub/linux/kernel/v6.x/linux-6.7.4.tar.xz +https://ftp.gnu.org/gnu/m4/m4-1.4.19.tar.xz +https://ftp.gnu.org/gnu/make/make-4.4.1.tar.gz +https://download.savannah.gnu.org/releases/man-db/man-db-2.12.0.tar.xz +https://www.kernel.org/pub/linux/docs/man-pages/man-pages-6.06.tar.xz +https://pypi.org/packages/source/M/MarkupSafe/MarkupSafe-2.1.5.tar.gz +https://github.com/mesonbuild/meson/releases/download/1.3.2/meson-1.3.2.tar.gz +https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz +https://ftp.gnu.org/gnu/mpfr/mpfr-4.2.1.tar.xz +https://anduin.linuxfromscratch.org/LFS/ncurses-6.4-20230520.tar.xz +https://github.com/ninja-build/ninja/archive/v1.11.1/ninja-1.11.1.tar.gz +https://www.openssl.org/source/openssl-3.2.1.tar.gz +https://ftp.gnu.org/gnu/patch/patch-2.7.6.tar.xz +https://www.cpan.org/src/5.0/perl-5.38.2.tar.xz +https://distfiles.ariadne.space/pkgconf/pkgconf-2.1.1.tar.xz +https://sourceforge.net/projects/procps-ng/files/Production/procps-ng-4.0.4.tar.xz +https://sourceforge.net/projects/psmisc/files/psmisc/psmisc-23.6.tar.xz +https://www.python.org/ftp/python/3.12.2/Python-3.12.2.tar.xz +https://www.python.org/ftp/python/doc/3.12.2/python-3.12.2-docs-html.tar.bz2 +https://ftp.gnu.org/gnu/readline/readline-8.2.tar.gz +https://ftp.gnu.org/gnu/sed/sed-4.9.tar.xz +https://pypi.org/packages/source/s/setuptools/setuptools-69.1.0.tar.gz +https://github.com/shadow-maint/shadow/releases/download/4.14.5/shadow-4.14.5.tar.xz +https://www.infodrom.org/projects/sysklogd/download/sysklogd-1.5.1.tar.gz +https://github.com/systemd/systemd/archive/v255/systemd-255.tar.gz +https://anduin.linuxfromscratch.org/LFS/systemd-man-pages-255.tar.xz +https://github.com/slicer69/sysvinit/releases/download/3.08/sysvinit-3.08.tar.xz +https://ftp.gnu.org/gnu/tar/tar-1.35.tar.xz +https://downloads.sourceforge.net/tcl/tcl8.6.13-src.tar.gz +https://downloads.sourceforge.net/tcl/tcl8.6.13-html.tar.gz +https://ftp.gnu.org/gnu/texinfo/texinfo-7.1.tar.xz +https://www.iana.org/time-zones/repository/releases/tzdata2024a.tar.gz +https://anduin.linuxfromscratch.org/LFS/udev-lfs-20230818.tar.xz +https://www.kernel.org/pub/linux/utils/util-linux/v2.39/util-linux-2.39.3.tar.xz +https://github.com/vim/vim/archive/v9.1.0041/vim-9.1.0041.tar.gz +https://pypi.org/packages/source/w/wheel/wheel-0.42.0.tar.gz +https://cpan.metacpan.org/authors/id/T/TO/TODDR/XML-Parser-2.47.tar.gz +https://github.com/tukaani-project/xz/releases/download/v5.4.6/xz-5.4.6.tar.xz +https://zlib.net/fossils/zlib-1.3.1.tar.gz +https://github.com/facebook/zstd/releases/download/v1.5.5/zstd-1.5.5.tar.gz +https://www.linuxfromscratch.org/patches/lfs/12.1/bash-5.2.21-upstream_fixes-1.patch +https://www.linuxfromscratch.org/patches/lfs/12.1/bzip2-1.0.8-install_docs-1.patch +https://www.linuxfromscratch.org/patches/lfs/12.1/coreutils-9.4-i18n-1.patch +https://www.linuxfromscratch.org/patches/lfs/12.1/glibc-2.39-fhs-1.patch +https://www.linuxfromscratch.org/patches/lfs/12.1/kbd-2.6.4-backspace-1.patch +https://www.linuxfromscratch.org/patches/lfs/12.1/readline-8.2-upstream_fixes-3.patch +https://www.linuxfromscratch.org/patches/lfs/12.1/sysvinit-3.08-consolidated-1.patch +https://www.linuxfromscratch.org/patches/lfs/12.1/systemd-255-upstream_fixes-1.patch diff --git a/ai/metadata/index.json b/ai/metadata/index.json index 684b1d8..0245bc5 100644 --- a/ai/metadata/index.json +++ b/ai/metadata/index.json @@ -1,15 +1,45 @@ { - "generated_at": "2025-10-01T04:35:27.106227+00:00", + "generated_at": "2025-10-01T05:54:56.228701+00:00", "packages": [ { "book": "mlfs", - "id": "mlfs/binutils/pass1", + "id": "mlfs/linux", + "name": "Linux", + "path": "packages/mlfs/linux-headers.json", + "stage": "cross-toolchain", + "status": "draft", + "variant": null, + "version": "6.16.9 API Headers" + }, + { + "book": "mlfs", + "id": "mlfs/glibc", + "name": "Glibc", + "path": "packages/mlfs/glibc.json", + "stage": "cross-toolchain", + "status": "draft", + "variant": null, + "version": "2.42" + }, + { + "book": "mlfs", + "id": "mlfs/binutils-pass-1", "name": "Binutils", - "path": "packages/mlfs/binutils-pass1.json", + "path": "packages/mlfs/binutils-pass-1.json", "stage": "cross-toolchain", "status": "draft", "variant": "Pass 1", "version": "2.45" + }, + { + "book": "mlfs", + "id": "mlfs/gcc-pass-1", + "name": "GCC", + "path": "packages/mlfs/gcc-pass-1.json", + "stage": "cross-toolchain", + "status": "draft", + "variant": "Pass 1", + "version": "15.2.0" } ], "schema_version": "v0.1.0" diff --git a/ai/metadata/packages/mlfs/gcc-pass-1.json b/ai/metadata/packages/mlfs/gcc-pass-1.json new file mode 100644 index 0000000..2c06010 --- /dev/null +++ b/ai/metadata/packages/mlfs/gcc-pass-1.json @@ -0,0 +1,174 @@ +{ + "artifacts": { + "disk": 5, + "install_prefix": null, + "sbu": 3.0 + }, + "build": [ + { + "commands": [ + "tar -xf ../mpfr-4.2.2.tar.xz", + "mv -v mpfr-4.2.2 mpfr", + "tar -xf ../gmp-6.3.0.tar.xz", + "mv -v gmp-6.3.0 gmp", + "tar -xf ../mpc-1.3.1.tar.gz", + "mv -v mpc-1.3.1 mpc" + ], + "cwd": null, + "notes": null, + "phase": "setup", + "requires_root": false + }, + { + "commands": [ + "sed -e '/m64=/s/lib64/lib/' \\", + "-e '/m32=/s/m32=.*/m32=..\\/lib32$(call if_multiarch,:i386-linux-gnu)/' \\", + "-i.orig gcc/config/i386/t-linux64" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "sed '/STACK_REALIGN_DEFAULT/s/0/(!TARGET_64BIT \\&\\& TARGET_SSE)/' \\", + "-i gcc/config/i386/i386.h" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "mkdir -v build", + "cd build" + ], + "cwd": null, + "notes": null, + "phase": "setup", + "requires_root": false + }, + { + "commands": [ + "mlist=m64,m32", + "../configure \\", + "--target=$LFS_TGT \\", + "--prefix=$LFS/tools \\", + "--with-glibc-version=2.42 \\", + "--with-sysroot=$LFS \\", + "--with-newlib \\", + "--without-headers \\", + "--enable-default-pie \\", + "--enable-default-ssp \\", + "--enable-initfini-array \\", + "--disable-nls \\", + "--disable-shared \\", + "--enable-multilib --with-multilib-list=$mlist \\", + "--disable-decimal-float \\", + "--disable-threads \\", + "--disable-libatomic \\", + "--disable-libgomp \\", + "--disable-libquadmath \\", + "--disable-libssp \\", + "--disable-libvtv \\", + "--disable-libstdcxx \\", + "--enable-languages=c,c++" + ], + "cwd": null, + "notes": null, + "phase": "configure", + "requires_root": false + }, + { + "commands": [ + "make" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "make install" + ], + "cwd": null, + "notes": null, + "phase": "install", + "requires_root": false + }, + { + "commands": [ + "cd ..", + "cat gcc/limitx.h gcc/glimits.h gcc/limity.h > \\", + "`dirname $($LFS_TGT-gcc -print-libgcc-file-name)`/include/limits.h" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + } + ], + "dependencies": { + "build": [], + "runtime": [] + }, + "environment": { + "users": [], + "variables": [] + }, + "optimizations": { + "cflags": [ + "-O3", + "-flto" + ], + "enable_lto": true, + "enable_pgo": true, + "ldflags": [ + "-flto" + ], + "profdata": null + }, + "package": { + "anchors": { + "section": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/gcc-pass1.html#ch-tools-gcc-pass1" + }, + "book": "mlfs", + "chapter": 5, + "id": "mlfs/gcc-pass-1", + "name": "GCC", + "section": "5.3", + "stage": "cross-toolchain", + "upstream": null, + "variant": "Pass 1", + "version": "15.2.0" + }, + "provenance": { + "book_release": "lfs-ml-12.4-40-multilib", + "content_hash": "439fb0bf6a99414e9ddf1301e603948e073d5df2dcf897ac5581c399bd99f045", + "page_url": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/gcc-pass1.html", + "retrieved_at": "2025-10-01T05:30:05.164831+00:00" + }, + "schema_version": "v0.1.0", + "source": { + "archive": "mpfr-4.2.2.tar.xz", + "checksums": [ + { + "alg": "md5", + "value": "7c32c39b8b6e3ae85f25156228156061" + } + ], + "urls": [ + { + "kind": "primary", + "url": "https://ftp.gnu.org/gnu/gcc/gcc-15.2.0/gcc-15.2.0.tar.xz" + } + ] + }, + "status": { + "issues": [], + "state": "draft" + } +} \ No newline at end of file diff --git a/ai/metadata/packages/mlfs/glibc.json b/ai/metadata/packages/mlfs/glibc.json new file mode 100644 index 0000000..1f45180 --- /dev/null +++ b/ai/metadata/packages/mlfs/glibc.json @@ -0,0 +1,289 @@ +{ + "artifacts": { + "disk": 870, + "install_prefix": null, + "sbu": 1.0 + }, + "build": [ + { + "commands": [ + "ln -sfv ../lib/ld-linux-x86-64.so.2 $LFS/lib64", + "ln -sfv ../lib/ld-linux-x86-64.so.2 $LFS/lib64/ld-lsb-x86-64.so.3" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "patch -Np1 -i ../glibc-2.42-fhs-1.patch" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "mkdir -v build", + "cd build" + ], + "cwd": null, + "notes": null, + "phase": "setup", + "requires_root": false + }, + { + "commands": [ + "echo \"rootsbindir=/usr/sbin\" > configparms" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "../configure \\", + "--prefix=/usr \\", + "--host=$LFS_TGT \\", + "--build=$(../scripts/config.guess) \\", + "--disable-nscd \\", + "libc_cv_slibdir=/usr/lib \\", + "--enable-kernel=5.4" + ], + "cwd": null, + "notes": null, + "phase": "configure", + "requires_root": false + }, + { + "commands": [ + "make" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "make DESTDIR=$LFS install" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "sed '/RTLDLIST=/s@/usr@@g' -i $LFS/usr/bin/ldd" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "echo 'int main(){}' | $LFS_TGT-gcc -x c - -v -Wl,--verbose &> dummy.log", + "readelf -l a.out | grep ': /lib'" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "grep -E -o \"$LFS/lib.*/S?crt[1in].*succeeded\" dummy.log" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "grep -B3 \"^ $LFS/usr/include\" dummy.log" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "grep 'SEARCH.*/usr/lib' dummy.log |sed 's|; |\\n|g'" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "grep \"/lib.*/libc.so.6 \" dummy.log" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "grep found dummy.log" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "rm -v a.out dummy.log" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "make clean", + "find .. -name \"*.a\" -delete" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "CC=\"$LFS_TGT-gcc -m32\" \\", + "CXX=\"$LFS_TGT-g++ -m32\" \\", + "../configure \\", + "--prefix=/usr \\", + "--host=$LFS_TGT32 \\", + "--build=$(../scripts/config.guess) \\", + "--disable-nscd \\", + "--with-headers=$LFS/usr/include \\", + "--libdir=/usr/lib32 \\", + "--libexecdir=/usr/lib32 \\", + "libc_cv_slibdir=/usr/lib32 \\", + "--enable-kernel=5.4" + ], + "cwd": null, + "notes": null, + "phase": "configure", + "requires_root": false + }, + { + "commands": [ + "make" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "make DESTDIR=$PWD/DESTDIR install", + "cp -a DESTDIR/usr/lib32 $LFS/usr/", + "install -vm644 DESTDIR/usr/include/gnu/{lib-names,stubs}-32.h \\", + "$LFS/usr/include/gnu/", + "ln -svf ../lib32/ld-linux.so.2 $LFS/lib/ld-linux.so.2" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "echo 'int main(){}' > dummy.c", + "$LFS_TGT-gcc -m32 dummy.c", + "readelf -l a.out | grep '/ld-linux'" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "rm -v dummy.c a.out" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + } + ], + "dependencies": { + "build": [], + "runtime": [] + }, + "environment": { + "users": [], + "variables": [] + }, + "optimizations": { + "cflags": [ + "-O3", + "-flto" + ], + "enable_lto": true, + "enable_pgo": true, + "ldflags": [ + "-flto" + ], + "profdata": null + }, + "package": { + "anchors": { + "section": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/glibc.html#ch-tools-glibc" + }, + "book": "mlfs", + "chapter": 5, + "id": "mlfs/glibc", + "name": "Glibc", + "section": "5.5", + "stage": "cross-toolchain", + "upstream": null, + "variant": null, + "version": "2.42" + }, + "provenance": { + "book_release": "lfs-ml-12.4-40-multilib", + "content_hash": "52b9b51a37f960c3d8694d06cfd1a8ef297a3a2e473e565f402765b24c4e2329", + "page_url": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/glibc.html", + "retrieved_at": "2025-10-01T05:30:28.195848+00:00" + }, + "schema_version": "v0.1.0", + "source": { + "archive": "glibc-2.42.tar.xz", + "checksums": [ + { + "alg": "md5", + "value": "23c6f5a27932b435cae94e087cb8b1f5" + } + ], + "urls": [ + { + "kind": "primary", + "url": "https://ftp.gnu.org/gnu/glibc/glibc-2.42.tar.xz" + }, + { + "kind": "primary", + "url": "https://www.linuxfromscratch.org/patches/lfs/development/glibc-2.42-fhs-1.patch" + } + ] + }, + "status": { + "issues": [], + "state": "draft" + } +} \ No newline at end of file diff --git a/ai/metadata/packages/mlfs/linux-headers.json b/ai/metadata/packages/mlfs/linux-headers.json new file mode 100644 index 0000000..4eaed2e --- /dev/null +++ b/ai/metadata/packages/mlfs/linux-headers.json @@ -0,0 +1,81 @@ +{ + "artifacts": { + "disk": 1, + "install_prefix": null, + "sbu": 0.0 + }, + "build": [ + { + "commands": [ + "make mrproper" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "make headers", + "find usr/include -type f ! -name '*.h' -delete", + "cp -rv usr/include $LFS/usr" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + } + ], + "dependencies": { + "build": [], + "runtime": [] + }, + "environment": { + "users": [], + "variables": [] + }, + "optimizations": { + "cflags": [ + "-O3", + "-flto" + ], + "enable_lto": true, + "enable_pgo": true, + "ldflags": [ + "-flto" + ], + "profdata": null + }, + "package": { + "anchors": { + "section": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/linux-headers.html#ch-tools-linux-headers" + }, + "book": "mlfs", + "chapter": 5, + "id": "mlfs/linux", + "name": "Linux", + "section": "5.4", + "stage": "cross-toolchain", + "upstream": null, + "variant": null, + "version": "6.16.9 API Headers" + }, + "provenance": { + "book_release": "lfs-ml-12.4-40-multilib", + "content_hash": "cd251fbfaaa5da1eb43185331f5beaa07cdd9d50c79f19be266435781195b66d", + "page_url": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/linux-headers.html", + "retrieved_at": "2025-10-01T05:30:14.291785+00:00" + }, + "schema_version": "v0.1.0", + "source": { + "archive": null, + "checksums": [], + "urls": [] + }, + "status": { + "issues": [ + "No source URLs with archive extensions detected" + ], + "state": "draft" + } +} \ No newline at end of file diff --git a/src/pkgs/by_name/bi/binutils_pass_1/mod.rs b/src/pkgs/by_name/bi/binutils_pass_1/mod.rs new file mode 100644 index 0000000..3724fb1 --- /dev/null +++ b/src/pkgs/by_name/bi/binutils_pass_1/mod.rs @@ -0,0 +1,38 @@ +// MLFS metadata: stage: cross-toolchain, variant: Pass 1 + +use crate::pkgs::package::{OptimizationSettings, PackageDefinition}; + +pub fn definition() -> PackageDefinition { + let mut pkg = PackageDefinition::new("Binutils", "2.45"); + pkg.source = + Some("https://sourceware.org/pub/binutils/releases/binutils-2.45.tar.xz".to_string()); + pkg.md5 = Some("dee5b4267e0305a99a3c9d6131f45759".to_string()); + pkg.configure_args = Vec::new(); + pkg.build_commands = vec![ + "mkdir -v build".to_string(), + "cd build".to_string(), + "../configure --prefix=$LFS/tools \\".to_string(), + "--with-sysroot=$LFS \\".to_string(), + "--target=$LFS_TGT \\".to_string(), + "--disable-nls \\".to_string(), + "--enable-gprofng=no \\".to_string(), + "--disable-werror \\".to_string(), + "--enable-new-dtags \\".to_string(), + "--enable-default-hash-style=gnu".to_string(), + "make".to_string(), + ]; + pkg.install_commands = vec!["make install".to_string()]; + pkg.dependencies = Vec::new(); + let profdata = None; + let profdata_clone = profdata.clone(); + pkg.optimizations = match profdata_clone { + Some(path) => OptimizationSettings::for_pgo_replay(path), + None => OptimizationSettings::default(), + }; + pkg.optimizations.enable_lto = true; + pkg.optimizations.enable_pgo = true; + pkg.optimizations.cflags = vec!["-O3".to_string(), "-flto".to_string()]; + pkg.optimizations.ldflags = vec!["-flto".to_string()]; + pkg.optimizations.profdata = profdata; + pkg +} diff --git a/src/pkgs/by_name/bi/mod.rs b/src/pkgs/by_name/bi/mod.rs index 3148c5b..af583cc 100644 --- a/src/pkgs/by_name/bi/mod.rs +++ b/src/pkgs/by_name/bi/mod.rs @@ -1 +1,2 @@ pub mod binutils; +pub mod binutils_pass_1; diff --git a/src/pkgs/by_name/gc/gcc_pass_1/mod.rs b/src/pkgs/by_name/gc/gcc_pass_1/mod.rs new file mode 100644 index 0000000..0d1f0b9 --- /dev/null +++ b/src/pkgs/by_name/gc/gcc_pass_1/mod.rs @@ -0,0 +1,66 @@ +// MLFS metadata: stage: cross-toolchain, variant: Pass 1 + +use crate::pkgs::package::{OptimizationSettings, PackageDefinition}; + +pub fn definition() -> PackageDefinition { + let mut pkg = PackageDefinition::new("GCC", "15.2.0"); + pkg.source = Some("https://ftp.gnu.org/gnu/gcc/gcc-15.2.0/gcc-15.2.0.tar.xz".to_string()); + pkg.md5 = Some("7c32c39b8b6e3ae85f25156228156061".to_string()); + pkg.configure_args = Vec::new(); + pkg.build_commands = vec![ + "tar -xf ../mpfr-4.2.2.tar.xz".to_string(), + "mv -v mpfr-4.2.2 mpfr".to_string(), + "tar -xf ../gmp-6.3.0.tar.xz".to_string(), + "mv -v gmp-6.3.0 gmp".to_string(), + "tar -xf ../mpc-1.3.1.tar.gz".to_string(), + "mv -v mpc-1.3.1 mpc".to_string(), + "sed -e '/m64=/s/lib64/lib/' \\".to_string(), + "-e '/m32=/s/m32=.*/m32=..\\/lib32$(call if_multiarch,:i386-linux-gnu)/' \\".to_string(), + "-i.orig gcc/config/i386/t-linux64".to_string(), + "sed '/STACK_REALIGN_DEFAULT/s/0/(!TARGET_64BIT \\&\\& TARGET_SSE)/' \\".to_string(), + "-i gcc/config/i386/i386.h".to_string(), + "mkdir -v build".to_string(), + "cd build".to_string(), + "mlist=m64,m32".to_string(), + "../configure \\".to_string(), + "--target=$LFS_TGT \\".to_string(), + "--prefix=$LFS/tools \\".to_string(), + "--with-glibc-version=2.42 \\".to_string(), + "--with-sysroot=$LFS \\".to_string(), + "--with-newlib \\".to_string(), + "--without-headers \\".to_string(), + "--enable-default-pie \\".to_string(), + "--enable-default-ssp \\".to_string(), + "--enable-initfini-array \\".to_string(), + "--disable-nls \\".to_string(), + "--disable-shared \\".to_string(), + "--enable-multilib --with-multilib-list=$mlist \\".to_string(), + "--disable-decimal-float \\".to_string(), + "--disable-threads \\".to_string(), + "--disable-libatomic \\".to_string(), + "--disable-libgomp \\".to_string(), + "--disable-libquadmath \\".to_string(), + "--disable-libssp \\".to_string(), + "--disable-libvtv \\".to_string(), + "--disable-libstdcxx \\".to_string(), + "--enable-languages=c,c++".to_string(), + "make".to_string(), + "cd ..".to_string(), + "cat gcc/limitx.h gcc/glimits.h gcc/limity.h > \\".to_string(), + "`dirname $($LFS_TGT-gcc -print-libgcc-file-name)`/include/limits.h".to_string(), + ]; + pkg.install_commands = vec!["make install".to_string()]; + pkg.dependencies = Vec::new(); + let profdata = None; + let profdata_clone = profdata.clone(); + pkg.optimizations = match profdata_clone { + Some(path) => OptimizationSettings::for_pgo_replay(path), + None => OptimizationSettings::default(), + }; + pkg.optimizations.enable_lto = true; + pkg.optimizations.enable_pgo = true; + pkg.optimizations.cflags = vec!["-O3".to_string(), "-flto".to_string()]; + pkg.optimizations.ldflags = vec!["-flto".to_string()]; + pkg.optimizations.profdata = profdata; + pkg +} diff --git a/src/pkgs/by_name/gc/mod.rs b/src/pkgs/by_name/gc/mod.rs new file mode 100644 index 0000000..e6d0006 --- /dev/null +++ b/src/pkgs/by_name/gc/mod.rs @@ -0,0 +1 @@ +pub mod gcc_pass_1; diff --git a/src/pkgs/by_name/gl/glibc/mod.rs b/src/pkgs/by_name/gl/glibc/mod.rs new file mode 100644 index 0000000..59b863b --- /dev/null +++ b/src/pkgs/by_name/gl/glibc/mod.rs @@ -0,0 +1,74 @@ +// MLFS metadata: stage: cross-toolchain + +use crate::pkgs::package::{OptimizationSettings, PackageDefinition}; + +pub fn definition() -> PackageDefinition { + let mut pkg = PackageDefinition::new("Glibc", "2.42"); + pkg.source = Some("https://ftp.gnu.org/gnu/glibc/glibc-2.42.tar.xz".to_string()); + pkg.md5 = Some("23c6f5a27932b435cae94e087cb8b1f5".to_string()); + pkg.configure_args = Vec::new(); + pkg.build_commands = vec![ + "ln -sfv ../lib/ld-linux-x86-64.so.2 $LFS/lib64".to_string(), + "ln -sfv ../lib/ld-linux-x86-64.so.2 $LFS/lib64/ld-lsb-x86-64.so.3".to_string(), + "patch -Np1 -i ../glibc-2.42-fhs-1.patch".to_string(), + "mkdir -v build".to_string(), + "cd build".to_string(), + "echo \"rootsbindir=/usr/sbin\" > configparms".to_string(), + "../configure \\".to_string(), + "--prefix=/usr \\".to_string(), + "--host=$LFS_TGT \\".to_string(), + "--build=$(../scripts/config.guess) \\".to_string(), + "--disable-nscd \\".to_string(), + "libc_cv_slibdir=/usr/lib \\".to_string(), + "--enable-kernel=5.4".to_string(), + "make".to_string(), + "make DESTDIR=$LFS install".to_string(), + "sed '/RTLDLIST=/s@/usr@@g' -i $LFS/usr/bin/ldd".to_string(), + "echo 'int main(){}' | $LFS_TGT-gcc -x c - -v -Wl,--verbose &> dummy.log".to_string(), + "readelf -l a.out | grep ': /lib'".to_string(), + "grep -E -o \"$LFS/lib.*/S?crt[1in].*succeeded\" dummy.log".to_string(), + "grep -B3 \"^ $LFS/usr/include\" dummy.log".to_string(), + "grep 'SEARCH.*/usr/lib' dummy.log |sed 's|; |\\n|g'".to_string(), + "grep \"/lib.*/libc.so.6 \" dummy.log".to_string(), + "grep found dummy.log".to_string(), + "rm -v a.out dummy.log".to_string(), + "make clean".to_string(), + "find .. -name \"*.a\" -delete".to_string(), + "CC=\"$LFS_TGT-gcc -m32\" \\".to_string(), + "CXX=\"$LFS_TGT-g++ -m32\" \\".to_string(), + "../configure \\".to_string(), + "--prefix=/usr \\".to_string(), + "--host=$LFS_TGT32 \\".to_string(), + "--build=$(../scripts/config.guess) \\".to_string(), + "--disable-nscd \\".to_string(), + "--with-headers=$LFS/usr/include \\".to_string(), + "--libdir=/usr/lib32 \\".to_string(), + "--libexecdir=/usr/lib32 \\".to_string(), + "libc_cv_slibdir=/usr/lib32 \\".to_string(), + "--enable-kernel=5.4".to_string(), + "make".to_string(), + "make DESTDIR=$PWD/DESTDIR install".to_string(), + "cp -a DESTDIR/usr/lib32 $LFS/usr/".to_string(), + "install -vm644 DESTDIR/usr/include/gnu/{lib-names,stubs}-32.h \\".to_string(), + "$LFS/usr/include/gnu/".to_string(), + "ln -svf ../lib32/ld-linux.so.2 $LFS/lib/ld-linux.so.2".to_string(), + "echo 'int main(){}' > dummy.c".to_string(), + "$LFS_TGT-gcc -m32 dummy.c".to_string(), + "readelf -l a.out | grep '/ld-linux'".to_string(), + "rm -v dummy.c a.out".to_string(), + ]; + pkg.install_commands = Vec::new(); + pkg.dependencies = Vec::new(); + let profdata = None; + let profdata_clone = profdata.clone(); + pkg.optimizations = match profdata_clone { + Some(path) => OptimizationSettings::for_pgo_replay(path), + None => OptimizationSettings::default(), + }; + pkg.optimizations.enable_lto = true; + pkg.optimizations.enable_pgo = true; + pkg.optimizations.cflags = vec!["-O3".to_string(), "-flto".to_string()]; + pkg.optimizations.ldflags = vec!["-flto".to_string()]; + pkg.optimizations.profdata = profdata; + pkg +} diff --git a/src/pkgs/by_name/gl/mod.rs b/src/pkgs/by_name/gl/mod.rs new file mode 100644 index 0000000..bfcf232 --- /dev/null +++ b/src/pkgs/by_name/gl/mod.rs @@ -0,0 +1 @@ +pub mod glibc; diff --git a/src/pkgs/by_name/li/linux/mod.rs b/src/pkgs/by_name/li/linux/mod.rs new file mode 100644 index 0000000..48d1d48 --- /dev/null +++ b/src/pkgs/by_name/li/linux/mod.rs @@ -0,0 +1,30 @@ +// MLFS metadata: stage: cross-toolchain + +use crate::pkgs::package::{OptimizationSettings, PackageDefinition}; + +pub fn definition() -> PackageDefinition { + let mut pkg = PackageDefinition::new("Linux", "6.16.9 API Headers"); + pkg.source = None; + pkg.md5 = None; + pkg.configure_args = Vec::new(); + pkg.build_commands = vec![ + "make mrproper".to_string(), + "make headers".to_string(), + "find usr/include -type f ! -name '*.h' -delete".to_string(), + "cp -rv usr/include $LFS/usr".to_string(), + ]; + pkg.install_commands = Vec::new(); + pkg.dependencies = Vec::new(); + let profdata = None; + let profdata_clone = profdata.clone(); + pkg.optimizations = match profdata_clone { + Some(path) => OptimizationSettings::for_pgo_replay(path), + None => OptimizationSettings::default(), + }; + pkg.optimizations.enable_lto = true; + pkg.optimizations.enable_pgo = true; + pkg.optimizations.cflags = vec!["-O3".to_string(), "-flto".to_string()]; + pkg.optimizations.ldflags = vec!["-flto".to_string()]; + pkg.optimizations.profdata = profdata; + pkg +} diff --git a/src/pkgs/by_name/li/mod.rs b/src/pkgs/by_name/li/mod.rs new file mode 100644 index 0000000..057cec9 --- /dev/null +++ b/src/pkgs/by_name/li/mod.rs @@ -0,0 +1 @@ +pub mod linux; diff --git a/src/pkgs/by_name/mod.rs b/src/pkgs/by_name/mod.rs index 517c3a3..7a7244f 100644 --- a/src/pkgs/by_name/mod.rs +++ b/src/pkgs/by_name/mod.rs @@ -1 +1,4 @@ pub mod bi; +pub mod gc; +pub mod gl; +pub mod li;