diff --git a/Cargo.lock b/Cargo.lock index 5e59b37..5331705 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,7 @@ dependencies = [ "cfg-if", "getrandom 0.3.3", "once_cell", + "serde", "version_check", "zerocopy", ] @@ -40,10 +41,13 @@ dependencies = [ ] [[package]] -name = "allocator-api2" -version = "0.2.21" +name = "android_system_properties" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] [[package]] name = "anstream" @@ -128,6 +132,12 @@ dependencies = [ "windows-link 0.2.0", ] +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -154,6 +164,21 @@ dependencies = [ "virtue", ] +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + [[package]] name = "bitflags" version = "1.3.2" @@ -181,6 +206,12 @@ version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "bytecount" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "175812e0be2bccb6abe50bb8d566126198344f707e304f45c648fd8f2cc0365e" + [[package]] name = "byteorder" version = "1.5.0" @@ -199,15 +230,6 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df8670b8c7b9dae1793364eafadf7239c40d669904660c5960d74cfd80b46a53" -[[package]] -name = "castaway" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" -dependencies = [ - "rustversion", -] - [[package]] name = "cc" version = "1.2.39" @@ -230,6 +252,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chrono" +version = "0.4.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2" +dependencies = [ + "iana-time-zone", + "num-traits", + "windows-link 0.2.0", +] + [[package]] name = "clap" version = "4.5.48" @@ -276,20 +309,6 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" -[[package]] -name = "compact_str" -version = "0.8.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" -dependencies = [ - "castaway", - "cfg-if", - "itoa", - "rustversion", - "ryu", - "static_assertions", -] - [[package]] name = "console" version = "0.16.1" @@ -383,22 +402,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "crossterm" -version = "0.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "829d955a0bb380ef178a640b91779e3987da38c9aea133b20614cfed8cdea9c6" -dependencies = [ - "bitflags 2.9.4", - "crossterm_winapi", - "mio 1.0.4", - "parking_lot", - "rustix 0.38.44", - "signal-hook", - "signal-hook-mio", - "winapi", -] - [[package]] name = "crossterm" version = "0.29.0" @@ -411,7 +414,7 @@ dependencies = [ "document-features", "mio 1.0.4", "parking_lot", - "rustix 1.1.2", + "rustix", "signal-hook", "signal-hook-mio", "winapi", @@ -461,9 +464,9 @@ dependencies = [ [[package]] name = "darling" -version = "0.20.11" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" dependencies = [ "darling_core", "darling_macro", @@ -471,9 +474,9 @@ dependencies = [ [[package]] name = "darling_core" -version = "0.20.11" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" dependencies = [ "fnv", "ident_case", @@ -485,9 +488,9 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.20.11" +version = "0.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" dependencies = [ "darling_core", "quote", @@ -547,6 +550,42 @@ dependencies = [ "zeroize", ] +[[package]] +name = "diesel" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8496eeb328dce26ee9d9b73275d396d9bddb433fa30106cf6056dd8c3c2764c" +dependencies = [ + "diesel_derives", + "downcast-rs", + "libsqlite3-sys", + "r2d2", + "sqlite-wasm-rs", + "time", +] + +[[package]] +name = "diesel_derives" +version = "2.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09af0e983035368439f1383011cd87c46f41da81d0f21dc3727e2857d5a43c8e" +dependencies = [ + "diesel_table_macro_syntax", + "dsl_auto_type", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "diesel_table_macro_syntax" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe2444076b48641147115697648dc743c2c00b61adade0f01ce67133c7babe8c" +dependencies = [ + "syn 2.0.106", +] + [[package]] name = "digest" version = "0.10.7" @@ -583,6 +622,26 @@ dependencies = [ "litrs", ] +[[package]] +name = "downcast-rs" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" + +[[package]] +name = "dsl_auto_type" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd122633e4bef06db27737f21d3738fb89c8f6d5360d6d9d7635dda142a7757e" +dependencies = [ + "darling", + "either", + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "dtoa" version = "1.0.10" @@ -647,6 +706,16 @@ dependencies = [ "windows-sys 0.61.1", ] +[[package]] +name = "fancy-regex" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b95f7c0680e4142284cf8b22c14a476e87d61b004a3a0861872b32ef7ead40a2" +dependencies = [ + "bit-set", + "regex", +] + [[package]] name = "fastrand" version = "2.3.0" @@ -665,12 +734,6 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" -[[package]] -name = "foldhash" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" - [[package]] name = "foreign-types" version = "0.3.2" @@ -695,6 +758,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "fraction" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3027ae1df8d41b4bed2241c8fdad4acc1e7af60c8e17743534b545e77182d678" +dependencies = [ + "lazy_static", + "num", +] + [[package]] name = "futf" version = "0.1.5" @@ -799,8 +872,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" dependencies = [ "cfg-if", + "js-sys", "libc", "wasi 0.11.1+wasi-snapshot-preview1", + "wasm-bindgen", ] [[package]] @@ -834,6 +909,25 @@ dependencies = [ "thiserror 2.0.17", ] +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.12" @@ -845,7 +939,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.3.1", "indexmap", "slab", "tokio", @@ -853,17 +947,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "hashbrown" -version = "0.15.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" -dependencies = [ - "allocator-api2", - "equivalent", - "foldhash", -] - [[package]] name = "hashbrown" version = "0.16.0" @@ -923,6 +1006,17 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "http" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http" version = "1.3.1" @@ -934,6 +1028,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -941,7 +1046,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.3.1", ] [[package]] @@ -952,8 +1057,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "pin-project-lite", ] @@ -963,6 +1068,36 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.7.0" @@ -973,9 +1108,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -991,8 +1126,8 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.3.1", + "hyper 1.7.0", "hyper-util", "rustls", "rustls-pki-types", @@ -1009,7 +1144,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.7.0", "hyper-util", "native-tls", "tokio", @@ -1023,26 +1158,50 @@ version = "0.1.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http", - "http-body", - "hyper", + "http 1.3.1", + "http-body 1.0.1", + "hyper 1.7.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2", - "system-configuration", + "socket2 0.6.0", + "system-configuration 0.6.1", "tokio", "tower-service", "tracing", "windows-registry", ] +[[package]] +name = "iana-time-zone" +version = "0.1.64" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "log", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "icu_collections" version = "2.0.0" @@ -1163,7 +1322,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b0f83760fb341a774ed326568e19f5a863af4a952def8c39f9ab92fd95b88e5" dependencies = [ "equivalent", - "hashbrown 0.16.0", + "hashbrown", ] [[package]] @@ -1179,12 +1338,6 @@ dependencies = [ "web-time", ] -[[package]] -name = "indoc" -version = "2.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4c7245a08504955605670dbf141fceab975f15ca21570696aebe9d2e71576bd" - [[package]] name = "inquire" version = "0.9.1" @@ -1199,19 +1352,6 @@ dependencies = [ "unicode-width 0.2.0", ] -[[package]] -name = "instability" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "435d80800b936787d62688c927b6490e887c7ef5ff9ce922c6c6050fca75eb9a" -dependencies = [ - "darling", - "indoc", - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "io-uring" version = "0.7.10" @@ -1246,12 +1386,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" [[package]] -name = "itertools" -version = "0.13.0" +name = "iso8601" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +checksum = "e1082f0c48f143442a1ac6122f67e360ceee130b967af4d50996e5154a45df46" dependencies = [ - "either", + "nom", ] [[package]] @@ -1270,6 +1410,36 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "jsonschema" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a071f4f7efc9a9118dfb627a0a94ef247986e1ab8606a4c806ae2b3aa3b6978" +dependencies = [ + "ahash", + "anyhow", + "base64 0.21.7", + "bytecount", + "clap", + "fancy-regex", + "fraction", + "getrandom 0.2.16", + "iso8601", + "itoa", + "memchr", + "num-cmp", + "once_cell", + "parking_lot", + "percent-encoding", + "regex", + "reqwest 0.11.27", + "serde", + "serde_json", + "time", + "url", + "uuid", +] + [[package]] name = "lazy_static" version = "1.5.0" @@ -1283,10 +1453,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "58f929b4d672ea937a23a1ab494143d968337a5f47e56d0815df1e0890ddf174" [[package]] -name = "linux-raw-sys" -version = "0.4.15" +name = "libsqlite3-sys" +version = "0.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" +checksum = "133c182a6a2c87864fe97778797e46c7e999672690dc9fa3ee8e241aa4a9c13f" +dependencies = [ + "pkg-config", + "vcpkg", +] [[package]] name = "linux-raw-sys" @@ -1322,15 +1496,6 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" -[[package]] -name = "lru" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" -dependencies = [ - "hashbrown 0.15.5", -] - [[package]] name = "mac" version = "0.1.1" @@ -1452,6 +1617,15 @@ dependencies = [ "libc", ] +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + [[package]] name = "nu-ansi-term" version = "0.50.1" @@ -1461,12 +1635,91 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "num" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-cmp" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + [[package]] name = "num-conv" version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" +dependencies = [ + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.17.0" @@ -1547,25 +1800,28 @@ name = "package_management" version = "0.1.0" dependencies = [ "anyhow", + "chrono", "clap", "console", "crossterm 0.29.0", "dialoguer", + "diesel", "gptman", "hex", "html_parser", "indicatif", "inquire", + "jsonschema", "md5", "num_cpus", "rand 0.9.2", - "ratatui", "regex", - "reqwest", + "reqwest 0.12.23", "scraper", "semver", "serde", "serde_json", + "sha2", "shell-words", "spinners", "tokio", @@ -1575,6 +1831,7 @@ dependencies = [ "tui", "url", "uuid", + "walkdir", ] [[package]] @@ -1600,12 +1857,6 @@ dependencies = [ "windows-targets 0.52.6", ] -[[package]] -name = "paste" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" - [[package]] name = "percent-encoding" version = "2.3.2" @@ -1824,6 +2075,17 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r2d2" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51de85fb3fb6524929c8a2eb85e6b6d363de4e8c48f9e2c2eac4944abc181c93" +dependencies = [ + "log", + "parking_lot", + "scheduled-thread-pool", +] + [[package]] name = "rand" version = "0.8.5" @@ -1883,27 +2145,6 @@ dependencies = [ "getrandom 0.3.3", ] -[[package]] -name = "ratatui" -version = "0.29.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabd94c2f37801c20583fc49dd5cd6b0ba68c716787c2dd6ed18571e1e63117b" -dependencies = [ - "bitflags 2.9.4", - "cassowary", - "compact_str", - "crossterm 0.28.1", - "indoc", - "instability", - "itertools", - "lru", - "paste", - "strum 0.26.3", - "unicode-segmentation", - "unicode-truncate", - "unicode-width 0.2.0", -] - [[package]] name = "redox_syscall" version = "0.5.17" @@ -1942,23 +2183,59 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +[[package]] +name = "reqwest" +version = "0.11.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62" +dependencies = [ + "base64 0.21.7", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper 0.1.2", + "system-configuration 0.5.1", + "tokio", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + [[package]] name = "reqwest" version = "0.12.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" dependencies = [ - "base64", + "base64 0.22.1", "bytes", "encoding_rs", "futures-channel", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.7.0", "hyper-rustls", "hyper-tls", "hyper-util", @@ -1972,7 +2249,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tokio-native-tls", "tower", @@ -2004,19 +2281,6 @@ version = "0.1.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags 2.9.4", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - [[package]] name = "rustix" version = "1.1.2" @@ -2026,7 +2290,7 @@ dependencies = [ "bitflags 2.9.4", "errno", "libc", - "linux-raw-sys 0.11.0", + "linux-raw-sys", "windows-sys 0.61.1", ] @@ -2075,6 +2339,15 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "schannel" version = "0.1.28" @@ -2084,6 +2357,15 @@ dependencies = [ "windows-sys 0.61.1", ] +[[package]] +name = "scheduled-thread-pool" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cbc66816425a074528352f5789333ecff06ca41b36b0b0efdfbb29edc391a19" +dependencies = [ + "parking_lot", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -2305,6 +2587,16 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.0" @@ -2323,7 +2615,22 @@ checksum = "a0ef947f358b9c238923f764c72a4a9d42f2d637c46e059dbd319d6e7cfb4f82" dependencies = [ "lazy_static", "maplit", - "strum 0.24.1", + "strum", +] + +[[package]] +name = "sqlite-wasm-rs" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9aead1c279716985b981b7940ef9b652d3f93d70a7296853c633b7ce8fa8088a" +dependencies = [ + "js-sys", + "once_cell", + "thiserror 2.0.17", + "tokio", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", ] [[package]] @@ -2332,12 +2639,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "static_assertions" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" - [[package]] name = "string_cache" version = "0.8.9" @@ -2375,16 +2676,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "063e6045c0e62079840579a7e47a355ae92f60eb74daaf156fb1e84ba164e63f" dependencies = [ - "strum_macros 0.24.3", -] - -[[package]] -name = "strum" -version = "0.26.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" -dependencies = [ - "strum_macros 0.26.4", + "strum_macros", ] [[package]] @@ -2400,19 +2692,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "strum_macros" -version = "0.26.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" -dependencies = [ - "heck 0.5.0", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.106", -] - [[package]] name = "subtle" version = "2.6.1" @@ -2441,6 +2720,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -2461,6 +2746,17 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "system-configuration" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "system-configuration-sys 0.5.0", +] + [[package]] name = "system-configuration" version = "0.6.1" @@ -2469,7 +2765,17 @@ checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ "bitflags 2.9.4", "core-foundation", - "system-configuration-sys", + "system-configuration-sys 0.6.0", +] + +[[package]] +name = "system-configuration-sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9" +dependencies = [ + "core-foundation-sys", + "libc", ] [[package]] @@ -2491,7 +2797,7 @@ dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix 1.1.2", + "rustix", "windows-sys 0.61.1", ] @@ -2611,7 +2917,7 @@ dependencies = [ "pin-project-lite", "signal-hook-registry", "slab", - "socket2", + "socket2 0.6.0", "tokio-macros", "windows-sys 0.59.0", ] @@ -2669,7 +2975,7 @@ dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tower-layer", "tower-service", @@ -2684,8 +2990,8 @@ dependencies = [ "bitflags 2.9.4", "bytes", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "iri-string", "pin-project-lite", "tower", @@ -2821,17 +3127,6 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" -[[package]] -name = "unicode-truncate" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3644627a5af5fa321c95b9b235a72fd24cd29c648c2c379431e6628655627bf" -dependencies = [ - "itertools", - "unicode-segmentation", - "unicode-width 0.1.14", -] - [[package]] name = "unicode-width" version = "0.1.14" @@ -2927,6 +3222,16 @@ version = "0.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" +[[package]] +name = "walkdir" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" +dependencies = [ + "same-file", + "winapi-util", +] + [[package]] name = "want" version = "0.3.1" @@ -3068,12 +3373,56 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys 0.61.1", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.62.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.2.0", + "windows-result 0.4.0", + "windows-strings 0.5.0", +] + +[[package]] +name = "windows-implement" +version = "0.60.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edb307e42a74fb6de9bf3a02d9712678b22399c87e6fa869d6dfcd8c1b7754e0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "windows-interface" +version = "0.59.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0abd1ddbc6964ac14db11c7213d6532ef34bd9aa042c2e5935f59d7908b46a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "windows-link" version = "0.1.3" @@ -3093,8 +3442,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b8a9ed28765efc97bbc954883f4e6796c33a06546ebafacbabee9696967499e" dependencies = [ "windows-link 0.1.3", - "windows-result", - "windows-strings", + "windows-result 0.3.4", + "windows-strings 0.4.2", ] [[package]] @@ -3106,6 +3455,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-result" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" +dependencies = [ + "windows-link 0.2.0", +] + [[package]] name = "windows-strings" version = "0.4.2" @@ -3115,6 +3473,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-strings" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" +dependencies = [ + "windows-link 0.2.0", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -3346,6 +3713,16 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +[[package]] +name = "winreg" +version = "0.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + [[package]] name = "wit-bindgen" version = "0.46.0" diff --git a/Cargo.toml b/Cargo.toml index 43e0f04..a6bc3b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,7 @@ console = "0.16.1" # Optional Terminal UI crossterm = { version = "0.29.0", optional = true } -ratatui = { version = "0.29.0", optional = true } +tui = { version = "0.19.0", optional = true } # Parsing & scraping html_parser = "0.7.0" @@ -22,6 +22,10 @@ scraper = "0.19.0" regex = "1.11.3" serde = { version = "1.0.228", features = ["derive"] } serde_json = "1.0.145" +jsonschema = "0.17.0" +walkdir = "2.5.0" +chrono = { version = "0.4.38", default-features = false, features = ["clock"] } +sha2 = "0.10.8" # Utilities indicatif = "0.18.0" @@ -34,7 +38,6 @@ md5 = "0.8.0" reqwest = { version = "0.12.23", features = ["blocking", "json"] } semver = "1.0.27" inquire = "0.9.1" -tui = "0.19.0" tracing = "0.1.41" tracing-appender = "0.2.3" tracing-subscriber = { version = "0.3.20", features = ["env-filter", "fmt"] } @@ -45,14 +48,15 @@ shell-words = "1.1.0" url = "2.5.7" uuid = { version = "1.18.1", features = ["v4"] } hex = "0.4.3" +diesel = { version = "2.1.6", features = ["sqlite", "r2d2", "returning_clauses_for_sqlite_3_35"] } + [features] # TUI feature flag -tui = ["ratatui", "crossterm"] +tui = ["dep:tui", "dep:crossterm"] # Optional default features default = [] -crossterm = ["dep:crossterm"] # ----------------------- # Cargo-make tasks diff --git a/README.md b/README.md index 75fa96c..83de895 100644 --- a/README.md +++ b/README.md @@ -94,10 +94,43 @@ You can also run the project directly in the flake shell: nix run ``` +### AI metadata tooling + +The AI metadata store under `ai/metadata/` comes with a helper CLI to +validate package records against the JSON schema and regenerate +`index.json` after adding new entries: + +```bash +cargo run --bin metadata_indexer -- --base-dir . validate +cargo run --bin metadata_indexer -- --base-dir . index +``` + +Use `--compact` with `index` if you prefer single-line JSON output. + +To draft metadata for a specific book page, you can run the harvest mode. +It fetches the XHTML, scrapes the build commands, and emits a schema- +compliant JSON skeleton (pass `--dry-run` to inspect the result without +writing to disk): + +```bash +cargo run --bin metadata_indexer -- \ + --base-dir . harvest \ + --book mlfs \ + --page chapter05/binutils-pass1 \ + --dry-run +``` + +## 📚 Documentation + +- [Architecture Overview](docs/ARCHITECTURE.md) – high-level tour of the crate + layout, binaries, and supporting modules. +- [Metadata Harvesting Pipeline](docs/METADATA_PIPELINE.md) – how the metadata + indexer produces and validates the JSON records under `ai/metadata/`. +- `ai/notes.md` – scratchpad for ongoing research tasks (e.g., deeper jhalfs + integration). + --- ## 📄 License LPKG is licensed under the [MIT License](LICENSE). - - diff --git a/ai/bugs.json b/ai/bugs.json new file mode 100644 index 0000000..c5c770a --- /dev/null +++ b/ai/bugs.json @@ -0,0 +1,29 @@ +[ + { + "id": "disk-manager-private-fields", + "title": "Disk manager accesses private GPT fields", + "description": "src/tui/disk_manager.rs reaches into gptman::GPT::partitions which is private, breaking compilation.", + "status": "open", + "owner": "default_cli", + "created_at": "2025-03-09T00:00:00Z", + "labels": ["tui", "blocking-build"] + }, + { + "id": "tui-feature-build", + "title": "TUI modules require crossterm feature gating", + "description": "When the crate is built without the `tui` feature the main binary still imports crossterm types and fails to compile.", + "status": "open", + "owner": "default_cli", + "created_at": "2025-03-09T00:00:00Z", + "labels": ["tui", "feature-gate"] + }, + { + "id": "metadata-harvest-no-source-urls", + "title": "Harvested metadata missing source URLs", + "description": "`metadata_indexer harvest --book mlfs --page chapter05/binutils-pass1 --dry-run` emits a draft record with no source URLs even after wget-list fallback; Binutils tarball discovery logic needs to inspect package download tables or improve slug matching.", + "status": "open", + "owner": "default_cli", + "created_at": "2025-10-01T04:40:00Z", + "labels": ["metadata", "ingest", "mlfs"] + } +] diff --git a/ai/metadata/cache/mlfs-md5sums.txt b/ai/metadata/cache/mlfs-md5sums.txt new file mode 100644 index 0000000..44d79f3 --- /dev/null +++ b/ai/metadata/cache/mlfs-md5sums.txt @@ -0,0 +1,97 @@ +590765dee95907dbc3c856f7255bd669 acl-2.3.2.tar.xz +227043ec2f6ca03c0948df5517f9c927 attr-2.5.2.tar.gz +1be79f7106ab6767f18391c5e22be701 autoconf-2.72.tar.xz +cea31dbf1120f890cbf2a3032cfb9a68 automake-1.18.1.tar.xz +977c8c0c5ae6309191e7768e28ebc951 bash-5.3.tar.gz +ad4db5a0eb4fdbb3f6813be4b6b3da74 bc-7.0.3.tar.xz +dee5b4267e0305a99a3c9d6131f45759 binutils-2.45.tar.xz +c28f119f405a2304ff0a7ccdcc629713 bison-3.8.2.tar.xz +67e051268d0c475ea773822f7500d0e5 bzip2-1.0.8.tar.gz +b2e687b6e664b9dd76581836c5c3e782 coreutils-9.8.tar.xz +68c5208c58236eba447d7d6d1326b821 dejagnu-1.6.3.tar.gz +d1b18b20868fb561f77861cd90b05de4 diffutils-3.12.tar.xz +113d7a7ee0710d2a670a44692a35fd2e e2fsprogs-1.47.3.tar.gz +ceefa052ded950a4c523688799193a44 elfutils-0.193.tar.bz2 +423975a2a775ff32f12c53635b463a91 expat-2.7.3.tar.xz +00fce8de158422f5ccd2666512329bd2 expect5.45.4.tar.gz +459da2d4b534801e2e2861611d823864 file-5.46.tar.gz +870cfd71c07d37ebe56f9f4aaf4ad872 findutils-4.10.0.tar.xz +2882e3179748cc9f9c23ec593d6adc8d flex-2.6.4.tar.gz +c538415c1f27bd69cbbbf3cdd5135d39 flit_core-3.12.0.tar.gz +b7014650c5f45e5d4837c31209dc0037 gawk-5.3.2.tar.xz +b861b092bf1af683c46a8aa2e689a6fd gcc-15.2.0.tar.xz +aaa600665bc89e2febb3c7bd90679115 gdbm-1.26.tar.gz +8e14e926f088e292f5f2bce95b81d10e gettext-0.26.tar.xz +23c6f5a27932b435cae94e087cb8b1f5 glibc-2.42.tar.xz +956dc04e864001a9c22429f761f2c283 gmp-6.3.0.tar.xz +31753b021ea78a21f154bf9eecb8b079 gperf-3.3.tar.gz +5d9301ed9d209c4a88c8d3a6fd08b9ac grep-3.12.tar.xz +5e4f40315a22bb8a158748e7d5094c7d groff-1.23.0.tar.gz +60c564b1bdc39d8e43b3aab4bc0fb140 grub-2.12.tar.xz +4bf5a10f287501ee8e8ebe00ef62b2c2 gzip-1.14.tar.xz +437a3e9f4a420244c90db4ab20e713b6 iana-etc-20250926.tar.gz +401d7d07682a193960bcdecafd03de94 inetutils-2.6.tar.xz +12e517cac2b57a0121cda351570f1e63 intltool-0.51.0.tar.gz +80e1f91bf59d572acc15d5c6eb4f3e7c iproute2-6.16.0.tar.xz +11ee9d335b227ea2e8579c4ba6e56138 isl-0.27.tar.xz +66d4c25ff43d1deaf9637ccda523dec8 jinja2-3.1.6.tar.gz +7be7c6f658f5fb9512e2c490349a8eeb kbd-2.9.0.tar.xz +36f2cc483745e81ede3406fa55e1065a kmod-34.2.tar.xz +0386dc14f6a081a94dfb4c2413864eed less-679.tar.gz +2be34eced7c861fea8894e7195dac636 lfs-bootscripts-20250827.tar.xz +449ade7d620b5c4eeb15a632fbaa4f74 libcap-2.76.tar.xz +92af9efad4ba398995abf44835c5d9e9 libffi-3.5.2.tar.gz +17ac6969b2015386bcb5d278a08a40b5 libpipeline-1.5.8.tar.gz +22e0a29df8af5fdde276ea3a7d351d30 libtool-2.5.4.tar.xz +1796a5d20098e9dd9e3f576803c83000 libxcrypt-4.4.38.tar.xz +feb0a3d5ecf5a4628aed7d9f8f7ab3f6 linux-6.16.9.tar.xz +dead9f5f1966d9ae56e1e32761e4e675 lz4-1.10.0.tar.gz +6eb2ebed5b24e74b6e890919331d2132 m4-1.4.20.tar.xz +c8469a3713cbbe04d955d4ae4be23eeb make-4.4.1.tar.gz +b6335533cbeac3b24cd7be31fdee8c83 man-db-2.13.1.tar.xz +16f68d70139dd2bbcae4102be4705753 man-pages-6.15.tar.xz +13a73126d25afa72a1ff0daed072f5fe markupsafe-3.0.3.tar.gz +19e0a1091cec23d369dd77d852844195 meson-1.9.1.tar.gz +5c9bc658c9fd0f940e8e3e0f09530c62 mpc-1.3.1.tar.gz +7c32c39b8b6e3ae85f25156228156061 mpfr-4.2.2.tar.xz +679987405412f970561cc85e1e6428a2 ncurses-6.5-20250809.tgz +c35f8f55f4cf60f1a916068d8f45a0f8 ninja-1.13.1.tar.gz +0ec20faeb96bbb203c8684cc7fe4432e openssl-3.5.3.tar.gz +ab0ef21ddebe09d1803575120d3f99f8 packaging-25.0.tar.gz +149327a021d41c8f88d034eab41c039f patch-2.8.tar.xz +641f99b635ebb9332a9b6a8ce8e2f3cf pcre2-10.46.tar.bz2 +7a6950a9f12d01eb96a9d2ed2f4e0072 perl-5.42.0.tar.xz +3291128c917fdb8fccd8c9e7784b643b pkgconf-2.5.1.tar.xz +90803e64f51f192f3325d25c3335d057 procps-ng-4.0.5.tar.xz +53eae841735189a896d614cba440eb10 psmisc-23.7.tar.xz +256cdb3bbf45cdce7499e52ba6c36ea3 Python-3.13.7.tar.xz +b84c0d81b2758398bb7f5b7411d3d908 python-3.13.7-docs-html.tar.bz2 +25a73bfb2a3ad7146c5e9d4408d9f6cd readline-8.3.tar.gz +6aac9b2dbafcd5b7a67a8a9bcb8036c3 sed-4.9.tar.xz +82e1d67883b713f9493659b50d13b436 setuptools-80.9.0.tar.gz +30ef46f54363db1d624587be68794ef2 shadow-4.18.0.tar.xz +d74bbdca4ab1b2bd46d3b3f8dbb0f3db sqlite-autoconf-3500400.tar.gz +63a62af5b35913459954e6e66876f2b8 sqlite-doc-3500400.tar.xz +af60786956a2dc84054fbf46652e515e sysklogd-2.7.2.tar.gz +25fe5d328e22641254761f1baa74cee0 systemd-257.8.tar.gz +a44063e2ec0cf4adfd2ed5c9e9e095c5 systemd-man-pages-257.8.tar.xz +bc6890b975d19dc9db42d0c7364dd092 sysvinit-3.14.tar.xz +a2d8042658cfd8ea939e6d911eaf4152 tar-1.35.tar.xz +1ec3444533f54d0f86cd120058e15e48 tcl8.6.17-src.tar.gz +60c71044e723b0db5f21be82929f3534 tcl8.6.17-html.tar.gz +11939a7624572814912a18e76c8d8972 texinfo-7.2.tar.xz +ad65154c48c74a9b311fe84778c5434f tzdata2025b.tar.gz +acd4360d8a5c3ef320b9db88d275dae6 udev-lfs-20230818.tar.xz +a2a3281ce76821c4bc28794fdf9d3994 util-linux-2.41.2.tar.xz +e72f31be182f1ccf4b66bef46ac1e60e vim-9.1.1806.tar.gz +65e09ee84af36821e3b1e9564aa91bd5 wheel-0.46.1.tar.gz +89a8e82cfd2ad948b349c0a69c494463 XML-Parser-2.47.tar.gz +cf5e1feb023d22c6bdaa30e84ef3abe3 xz-5.8.1.tar.xz +9855b6d802d7fe5b7bd5b196a2271655 zlib-1.3.1.tar.gz +780fc1896922b1bc52a4e90980cdda48 zstd-1.5.7.tar.gz +6a5ac7e89b791aae556de0f745916f7f bzip2-1.0.8-install_docs-1.patch +c800540039fb0707954197486b1bde70 coreutils-9.8-i18n-2.patch +0ca4d6bb8d572fbcdb13cb36cd34833e expect-5.45.4-gcc15-1.patch +9a5997c3452909b1769918c759eff8a2 glibc-2.42-fhs-1.patch +f75cca16a38da6caa7d52151f7136895 kbd-2.9.0-backspace-1.patch +3af8fd8e13cad481eeeaa48be4247445 sysvinit-3.14-consolidated-1.patch diff --git a/ai/metadata/cache/mlfs-wget-list.txt b/ai/metadata/cache/mlfs-wget-list.txt new file mode 100644 index 0000000..127aa84 --- /dev/null +++ b/ai/metadata/cache/mlfs-wget-list.txt @@ -0,0 +1,97 @@ +https://download.savannah.gnu.org/releases/acl/acl-2.3.2.tar.xz +https://download.savannah.gnu.org/releases/attr/attr-2.5.2.tar.gz +https://ftp.gnu.org/gnu/autoconf/autoconf-2.72.tar.xz +https://ftp.gnu.org/gnu/automake/automake-1.18.1.tar.xz +https://ftp.gnu.org/gnu/bash/bash-5.3.tar.gz +https://github.com/gavinhoward/bc/releases/download/7.0.3/bc-7.0.3.tar.xz +https://sourceware.org/pub/binutils/releases/binutils-2.45.tar.xz +https://ftp.gnu.org/gnu/bison/bison-3.8.2.tar.xz +https://www.sourceware.org/pub/bzip2/bzip2-1.0.8.tar.gz +https://ftp.gnu.org/gnu/coreutils/coreutils-9.8.tar.xz +https://ftp.gnu.org/gnu/dejagnu/dejagnu-1.6.3.tar.gz +https://ftp.gnu.org/gnu/diffutils/diffutils-3.12.tar.xz +https://downloads.sourceforge.net/project/e2fsprogs/e2fsprogs/v1.47.3/e2fsprogs-1.47.3.tar.gz +https://sourceware.org/ftp/elfutils/0.193/elfutils-0.193.tar.bz2 +https://github.com/libexpat/libexpat/releases/download/R_2_7_3/expat-2.7.3.tar.xz +https://prdownloads.sourceforge.net/expect/expect5.45.4.tar.gz +https://astron.com/pub/file/file-5.46.tar.gz +https://ftp.gnu.org/gnu/findutils/findutils-4.10.0.tar.xz +https://github.com/westes/flex/releases/download/v2.6.4/flex-2.6.4.tar.gz +https://pypi.org/packages/source/f/flit-core/flit_core-3.12.0.tar.gz +https://ftp.gnu.org/gnu/gawk/gawk-5.3.2.tar.xz +https://ftp.gnu.org/gnu/gcc/gcc-15.2.0/gcc-15.2.0.tar.xz +https://ftp.gnu.org/gnu/gdbm/gdbm-1.26.tar.gz +https://ftp.gnu.org/gnu/gettext/gettext-0.26.tar.xz +https://ftp.gnu.org/gnu/glibc/glibc-2.42.tar.xz +https://ftp.gnu.org/gnu/gmp/gmp-6.3.0.tar.xz +https://ftp.gnu.org/gnu/gperf/gperf-3.3.tar.gz +https://ftp.gnu.org/gnu/grep/grep-3.12.tar.xz +https://ftp.gnu.org/gnu/groff/groff-1.23.0.tar.gz +https://ftp.gnu.org/gnu/grub/grub-2.12.tar.xz +https://ftp.gnu.org/gnu/gzip/gzip-1.14.tar.xz +https://github.com/Mic92/iana-etc/releases/download/20250926/iana-etc-20250926.tar.gz +https://ftp.gnu.org/gnu/inetutils/inetutils-2.6.tar.xz +https://launchpad.net/intltool/trunk/0.51.0/+download/intltool-0.51.0.tar.gz +https://www.kernel.org/pub/linux/utils/net/iproute2/iproute2-6.16.0.tar.xz +https://libisl.sourceforge.io/isl-0.27.tar.xz +https://pypi.org/packages/source/J/Jinja2/jinja2-3.1.6.tar.gz +https://www.kernel.org/pub/linux/utils/kbd/kbd-2.9.0.tar.xz +https://www.kernel.org/pub/linux/utils/kernel/kmod/kmod-34.2.tar.xz +https://www.greenwoodsoftware.com/less/less-679.tar.gz +https://www.linuxfromscratch.org/lfs/downloads/development/lfs-bootscripts-20250827.tar.xz +https://www.kernel.org/pub/linux/libs/security/linux-privs/libcap2/libcap-2.76.tar.xz +https://github.com/libffi/libffi/releases/download/v3.5.2/libffi-3.5.2.tar.gz +https://download.savannah.gnu.org/releases/libpipeline/libpipeline-1.5.8.tar.gz +https://ftp.gnu.org/gnu/libtool/libtool-2.5.4.tar.xz +https://github.com/besser82/libxcrypt/releases/download/v4.4.38/libxcrypt-4.4.38.tar.xz +https://www.kernel.org/pub/linux/kernel/v6.x/linux-6.16.9.tar.xz +https://github.com/lz4/lz4/releases/download/v1.10.0/lz4-1.10.0.tar.gz +https://ftp.gnu.org/gnu/m4/m4-1.4.20.tar.xz +https://ftp.gnu.org/gnu/make/make-4.4.1.tar.gz +https://download.savannah.gnu.org/releases/man-db/man-db-2.13.1.tar.xz +https://www.kernel.org/pub/linux/docs/man-pages/man-pages-6.15.tar.xz +https://pypi.org/packages/source/M/MarkupSafe/markupsafe-3.0.3.tar.gz +https://github.com/mesonbuild/meson/releases/download/1.9.1/meson-1.9.1.tar.gz +https://ftp.gnu.org/gnu/mpc/mpc-1.3.1.tar.gz +https://ftp.gnu.org/gnu/mpfr/mpfr-4.2.2.tar.xz +https://invisible-mirror.net/archives/ncurses/current/ncurses-6.5-20250809.tgz +https://github.com/ninja-build/ninja/archive/v1.13.1/ninja-1.13.1.tar.gz +https://github.com/openssl/openssl/releases/download/openssl-3.5.3/openssl-3.5.3.tar.gz +https://files.pythonhosted.org/packages/source/p/packaging/packaging-25.0.tar.gz +https://ftp.gnu.org/gnu/patch/patch-2.8.tar.xz +https://github.com/PCRE2Project/pcre2/releases/download/pcre2-10.46/pcre2-10.46.tar.bz2 +https://www.cpan.org/src/5.0/perl-5.42.0.tar.xz +https://distfiles.ariadne.space/pkgconf/pkgconf-2.5.1.tar.xz +https://sourceforge.net/projects/procps-ng/files/Production/procps-ng-4.0.5.tar.xz +https://sourceforge.net/projects/psmisc/files/psmisc/psmisc-23.7.tar.xz +https://www.python.org/ftp/python/3.13.7/Python-3.13.7.tar.xz +https://www.python.org/ftp/python/doc/3.13.7/python-3.13.7-docs-html.tar.bz2 +https://ftp.gnu.org/gnu/readline/readline-8.3.tar.gz +https://ftp.gnu.org/gnu/sed/sed-4.9.tar.xz +https://pypi.org/packages/source/s/setuptools/setuptools-80.9.0.tar.gz +https://github.com/shadow-maint/shadow/releases/download/4.18.0/shadow-4.18.0.tar.xz +https://sqlite.org/2025/sqlite-autoconf-3500400.tar.gz +https://anduin.linuxfromscratch.org/LFS/sqlite-doc-3500400.tar.xz +https://github.com/troglobit/sysklogd/releases/download/v2.7.2/sysklogd-2.7.2.tar.gz +https://github.com/systemd/systemd/archive/v257.8/systemd-257.8.tar.gz +https://anduin.linuxfromscratch.org/LFS/systemd-man-pages-257.8.tar.xz +https://github.com/slicer69/sysvinit/releases/download/3.14/sysvinit-3.14.tar.xz +https://ftp.gnu.org/gnu/tar/tar-1.35.tar.xz +https://downloads.sourceforge.net/tcl/tcl8.6.17-src.tar.gz +https://downloads.sourceforge.net/tcl/tcl8.6.17-html.tar.gz +https://ftp.gnu.org/gnu/texinfo/texinfo-7.2.tar.xz +https://www.iana.org/time-zones/repository/releases/tzdata2025b.tar.gz +https://anduin.linuxfromscratch.org/LFS/udev-lfs-20230818.tar.xz +https://www.kernel.org/pub/linux/utils/util-linux/v2.41/util-linux-2.41.2.tar.xz +https://github.com/vim/vim/archive/v9.1.1806/vim-9.1.1806.tar.gz +https://pypi.org/packages/source/w/wheel/wheel-0.46.1.tar.gz +https://cpan.metacpan.org/authors/id/T/TO/TODDR/XML-Parser-2.47.tar.gz +https://github.com//tukaani-project/xz/releases/download/v5.8.1/xz-5.8.1.tar.xz +https://zlib.net/fossils/zlib-1.3.1.tar.gz +https://github.com/facebook/zstd/releases/download/v1.5.7/zstd-1.5.7.tar.gz +https://www.linuxfromscratch.org/patches/lfs/development/bzip2-1.0.8-install_docs-1.patch +https://www.linuxfromscratch.org/patches/lfs/development/coreutils-9.8-i18n-2.patch +https://www.linuxfromscratch.org/patches/lfs/development/expect-5.45.4-gcc15-1.patch +https://www.linuxfromscratch.org/patches/lfs/development/glibc-2.42-fhs-1.patch +https://www.linuxfromscratch.org/patches/lfs/development/kbd-2.9.0-backspace-1.patch +https://www.linuxfromscratch.org/patches/lfs/development/sysvinit-3.14-consolidated-1.patch diff --git a/ai/metadata/index.json b/ai/metadata/index.json new file mode 100644 index 0000000..684b1d8 --- /dev/null +++ b/ai/metadata/index.json @@ -0,0 +1,16 @@ +{ + "generated_at": "2025-10-01T04:35:27.106227+00:00", + "packages": [ + { + "book": "mlfs", + "id": "mlfs/binutils/pass1", + "name": "Binutils", + "path": "packages/mlfs/binutils-pass1.json", + "stage": "cross-toolchain", + "status": "draft", + "variant": "Pass 1", + "version": "2.45" + } + ], + "schema_version": "v0.1.0" +} \ No newline at end of file diff --git a/ai/metadata/packages/mlfs/binutils-pass-1.json b/ai/metadata/packages/mlfs/binutils-pass-1.json new file mode 100644 index 0000000..21e4382 --- /dev/null +++ b/ai/metadata/packages/mlfs/binutils-pass-1.json @@ -0,0 +1,113 @@ +{ + "artifacts": { + "disk": 678, + "install_prefix": null, + "sbu": 1.0 + }, + "build": [ + { + "commands": [ + "mkdir -v build", + "cd build" + ], + "cwd": null, + "notes": null, + "phase": "setup", + "requires_root": false + }, + { + "commands": [ + "../configure --prefix=$LFS/tools \\", + "--with-sysroot=$LFS \\", + "--target=$LFS_TGT \\", + "--disable-nls \\", + "--enable-gprofng=no \\", + "--disable-werror \\", + "--enable-new-dtags \\", + "--enable-default-hash-style=gnu" + ], + "cwd": null, + "notes": null, + "phase": "configure", + "requires_root": false + }, + { + "commands": [ + "make" + ], + "cwd": null, + "notes": null, + "phase": "build", + "requires_root": false + }, + { + "commands": [ + "make install" + ], + "cwd": null, + "notes": null, + "phase": "install", + "requires_root": false + } + ], + "dependencies": { + "build": [], + "runtime": [] + }, + "environment": { + "users": [], + "variables": [] + }, + "optimizations": { + "cflags": [ + "-O3", + "-flto" + ], + "enable_lto": true, + "enable_pgo": true, + "ldflags": [ + "-flto" + ], + "profdata": null + }, + "package": { + "anchors": { + "section": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html#ch-tools-binutils-pass1" + }, + "book": "mlfs", + "chapter": 5, + "id": "mlfs/binutils-pass-1", + "name": "Binutils", + "section": "5.2", + "stage": "cross-toolchain", + "upstream": null, + "variant": "Pass 1", + "version": "2.45" + }, + "provenance": { + "book_release": "lfs-ml-12.4-40-multilib", + "content_hash": "7c580aad04933a2f6ec5e5410a57695dd2d0b76a293212f33fd3edd226490853", + "page_url": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html", + "retrieved_at": "2025-10-01T04:57:22.375928+00:00" + }, + "schema_version": "v0.1.0", + "source": { + "archive": "binutils-2.45.tar.xz", + "checksums": [ + { + "alg": "md5", + "value": "dee5b4267e0305a99a3c9d6131f45759" + } + ], + "urls": [ + { + "kind": "primary", + "url": "https://sourceware.org/pub/binutils/releases/binutils-2.45.tar.xz" + } + ] + }, + "status": { + "issues": [], + "state": "draft" + } +} \ No newline at end of file diff --git a/ai/metadata/packages/mlfs/binutils-pass1.json b/ai/metadata/packages/mlfs/binutils-pass1.json new file mode 100644 index 0000000..303ca2d --- /dev/null +++ b/ai/metadata/packages/mlfs/binutils-pass1.json @@ -0,0 +1,147 @@ +{ + "schema_version": "v0.1.0", + "package": { + "id": "mlfs/binutils/pass1", + "name": "Binutils", + "upstream": "gnu/binutils", + "version": "2.45", + "book": "mlfs", + "chapter": 5, + "section": "5.02", + "stage": "cross-toolchain", + "variant": "Pass 1", + "anchors": { + "section": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html#ch-tools-binutils-pass1" + } + }, + "source": { + "urls": [ + { + "url": "https://ftp.gnu.org/gnu/binutils/binutils-2.45.tar.xz", + "kind": "primary" + }, + { + "url": "https://ftpmirror.gnu.org/binutils/binutils-2.45.tar.xz", + "kind": "mirror" + } + ], + "archive": "binutils-2.45.tar.xz" + }, + "artifacts": { + "sbu": 1, + "disk": 678, + "install_prefix": "$LFS/tools" + }, + "dependencies": { + "build": [ + { "name": "bash" }, + { "name": "coreutils" }, + { "name": "gcc", "optional": true } + ], + "runtime": [] + }, + "environment": { + "variables": [ + { + "name": "LFS", + "description": "Absolute path to mounted LFS workspace" + }, + { + "name": "LFS_TGT", + "description": "Target triple for cross toolchain" + } + ], + "users": [] + }, + "build": [ + { + "phase": "setup", + "commands": [ + "tar -xf binutils-2.45.tar.xz", + "cd binutils-2.45", + "mkdir -v build", + "cd build" + ] + }, + { + "phase": "configure", + "commands": [ + "../configure --prefix=$LFS/tools \\", + " --with-sysroot=$LFS \\", + " --target=$LFS_TGT \\", + " --disable-nls \\", + " --enable-gprofng=no \\", + " --disable-werror \\", + " --enable-new-dtags \\", + " --enable-default-hash-style=gnu" + ], + "cwd": "build" + }, + { + "phase": "build", + "commands": [ + "make" + ], + "cwd": "build" + }, + { + "phase": "test", + "commands": [ + "make -k check" + ], + "cwd": "build", + "notes": "Tests are optional for cross-toolchain; failures can be ignored" + }, + { + "phase": "install", + "commands": [ + "make install" + ], + "cwd": "build" + } + ], + "optimizations": { + "enable_lto": true, + "enable_pgo": true, + "cflags": ["-O3", "-flto", "-fprofile-generate"], + "ldflags": ["-flto", "-fprofile-generate"], + "profdata": null + }, + "tests": [ + { + "commands": [ + "make -k check" + ], + "optional": true, + "expected_failures": ["gas/run/elf-x86-64-reloc.sh"] + } + ], + "post_install": [ + { + "commands": [ + "rm -v $LFS/tools/lib/libbfd.a", + "rm -v $LFS/tools/lib/libctf-nobfd.a" + ], + "description": "Remove static libraries per LFS guidance" + } + ], + "notes": [ + { + "severity": "warning", + "text": "Ensure the host uses recent flex/bison to avoid configure warnings." + } + ], + "provenance": { + "book_release": "ml-12.4-40-multilib", + "page_url": "https://linuxfromscratch.org/~thomas/multilib-m32/chapter05/binutils-pass1.html", + "retrieved_at": "2025-03-09T00:00:00Z", + "content_hash": "0000000000000000000000000000000000000000000000000000000000000000" + }, + "status": { + "state": "draft", + "issues": [ + "Checksums not yet verified", + "Dependency list requires confirmation" + ] + } +} diff --git a/ai/metadata/schema.json b/ai/metadata/schema.json new file mode 100644 index 0000000..3fae44b --- /dev/null +++ b/ai/metadata/schema.json @@ -0,0 +1,377 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://lpkg.dev/schemas/package-metadata.json", + "title": "LPKG Package Metadata", + "type": "object", + "required": ["schema_version", "package", "source", "build", "provenance", "status"], + "additionalProperties": false, + "properties": { + "schema_version": { + "type": "string", + "pattern": "^v\\d+\\.\\d+\\.\\d+$" + }, + "package": { + "type": "object", + "required": ["id", "name", "version", "book"], + "additionalProperties": false, + "properties": { + "id": { + "type": "string", + "pattern": "^[a-z0-9][a-z0-9-/]*$" + }, + "name": { + "type": "string", + "minLength": 1 + }, + "upstream": { + "type": "string" + }, + "version": { + "type": "string", + "minLength": 1 + }, + "book": { + "type": "string", + "enum": ["lfs", "mlfs", "blfs", "glfs"] + }, + "chapter": { + "type": ["integer", "null"], + "minimum": 0 + }, + "section": { + "type": ["string", "null"], + "pattern": "^\\d+\\.\\d+$" + }, + "stage": { + "type": ["string", "null"], + "enum": [ + "cross-toolchain", + "temporary-tools", + "system", + "system-configuration", + "system-finalization", + "desktop", + "server", + "multilib", + "kernel", + "boot", + null + ] + }, + "variant": { + "type": ["string", "null"], + "minLength": 1 + }, + "anchors": { + "type": "object", + "additionalProperties": { + "type": "string", + "format": "uri" + } + } + } + }, + "source": { + "type": "object", + "required": ["urls"], + "additionalProperties": false, + "properties": { + "urls": { + "type": "array", + "items": { + "type": "object", + "required": ["url"], + "additionalProperties": false, + "properties": { + "url": { + "type": "string", + "format": "uri" + }, + "kind": { + "type": "string", + "enum": ["primary", "mirror", "patch", "signature"] + } + } + } + }, + "archive": { + "type": ["string", "null"] + }, + "checksums": { + "type": "array", + "items": { + "type": "object", + "required": ["alg", "value"], + "additionalProperties": false, + "properties": { + "alg": { + "type": "string", + "enum": ["md5", "sha1", "sha256", "sha512"] + }, + "value": { + "type": "string", + "pattern": "^[A-Fa-f0-9]{16,128}$" + } + } + } + } + } + }, + "artifacts": { + "type": "object", + "additionalProperties": false, + "properties": { + "sbu": { + "type": ["number", "null"], + "minimum": 0 + }, + "disk": { + "type": ["integer", "null"], + "minimum": 0, + "description": "Approximate disk usage in MB" + }, + "install_prefix": { + "type": ["string", "null"], + "minLength": 1 + } + } + }, + "dependencies": { + "type": "object", + "additionalProperties": false, + "properties": { + "build": { + "type": "array", + "items": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "optional": { + "type": "boolean", + "default": false + } + } + } + }, + "runtime": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + } + } + }, + "environment": { + "type": "object", + "additionalProperties": false, + "properties": { + "variables": { + "type": "array", + "items": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "pattern": "^[A-Z0-9_]+$" + }, + "required": { + "type": "boolean", + "default": true + }, + "description": { + "type": "string" + } + } + } + }, + "users": { + "type": "array", + "items": { + "type": "object", + "required": ["name"], + "additionalProperties": false, + "properties": { + "name": { + "type": "string", + "minLength": 1 + }, + "purpose": { + "type": "string" + } + } + } + } + } + }, + "build": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["phase", "commands"], + "additionalProperties": false, + "properties": { + "phase": { + "type": "string", + "enum": ["setup", "configure", "build", "test", "install", "post"] + }, + "commands": { + "type": "array", + "items": { + "type": "string", + "minLength": 1 + } + }, + "cwd": { + "type": ["string", "null"], + "minLength": 1 + }, + "requires_root": { + "type": "boolean", + "default": false + }, + "notes": { + "type": ["string", "null"], + "minLength": 1 + } + } + } + }, + "optimizations": { + "type": "object", + "additionalProperties": false, + "properties": { + "enable_lto": { + "type": "boolean" + }, + "enable_pgo": { + "type": "boolean" + }, + "cflags": { + "type": "array", + "items": { + "type": "string" + } + }, + "ldflags": { + "type": "array", + "items": { + "type": "string" + } + }, + "profdata": { + "type": ["string", "null"], + "minLength": 1 + } + } + }, + "tests": { + "type": "array", + "items": { + "type": "object", + "required": ["commands"], + "additionalProperties": false, + "properties": { + "commands": { + "type": "array", + "items": { + "type": "string" + } + }, + "optional": { + "type": "boolean" + }, + "expected_failures": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "post_install": { + "type": "array", + "items": { + "type": "object", + "required": ["commands"], + "additionalProperties": false, + "properties": { + "commands": { + "type": "array", + "items": { + "type": "string" + } + }, + "description": { + "type": ["string", "null"] + } + } + } + }, + "notes": { + "type": "array", + "items": { + "type": "object", + "required": ["text"], + "additionalProperties": false, + "properties": { + "severity": { + "type": "string", + "enum": ["info", "warning", "error"] + }, + "text": { + "type": "string" + } + } + } + }, + "provenance": { + "type": "object", + "required": ["book_release", "page_url", "retrieved_at"], + "additionalProperties": false, + "properties": { + "book_release": { + "type": "string" + }, + "page_url": { + "type": "string", + "format": "uri" + }, + "retrieved_at": { + "type": "string", + "format": "date-time" + }, + "content_hash": { + "type": "string", + "pattern": "^[A-Fa-f0-9]{64}$" + } + } + }, + "status": { + "type": "object", + "required": ["state"], + "additionalProperties": false, + "properties": { + "state": { + "type": "string", + "enum": ["draft", "review", "imported", "stale"] + }, + "issues": { + "type": "array", + "items": { + "type": "string" + } + } + } + } + } +} diff --git a/ai/notes.md b/ai/notes.md new file mode 100644 index 0000000..fd2e428 --- /dev/null +++ b/ai/notes.md @@ -0,0 +1,15 @@ +# Integrating jhalfs Source Metadata + +- Goal: reuse jhalfs wget-list and md5sums to populate package `source.urls` and +auto-fill checksums when harvesting metadata for MLFS/BLFS/GLFS packages. +- Data source: `https://anduin.linuxfromscratch.org/` hosts per-release + `wget-list`/`md5sums` files already curated by the jhalfs project. +- Approach: + 1. Fetch (and optionally cache under `ai/cache/`) the lists for each book. + 2. When harvesting, map `-` against the list to gather all + relevant URLs. + 3. Pull matching checksum entries to populate `source.checksums`. + 4. Keep the existing HTML scrape for chapter/stage text; jhalfs covers only + sources. +- Benefits: avoids fragile HTML tables, keeps URLs aligned with official build + scripts, and ensures checksums are up-to-date. diff --git a/ai/personas.json b/ai/personas.json new file mode 100644 index 0000000..765b0b4 --- /dev/null +++ b/ai/personas.json @@ -0,0 +1,24 @@ +[ + { + "id": "default_cli", + "name": "Codex CLI Assistant", + "description": "Default persona for repository automation; focuses on safe refactors and tooling improvements.", + "strengths": [ + "Rust and tooling pipelines", + "Workflow automation", + "Incremental migrations" + ], + "notes": "Derived from GPT-5 Codex runtime; avoids destructive operations without explicit approval." + }, + { + "id": "mlfs_researcher", + "name": "MLFS Researcher", + "description": "Persona dedicated to tracking Multilib Linux From Scratch package metadata and translating it into lpkg modules.", + "strengths": [ + "HTML scraping", + "Package manifest synthesis", + "Optimization flag tuning" + ], + "notes": "Activated when working with https://linuxfromscratch.org/~thomas/multilib-m32/ resources." + } +] diff --git a/ai/tasks.json b/ai/tasks.json new file mode 100644 index 0000000..abb56e2 --- /dev/null +++ b/ai/tasks.json @@ -0,0 +1,56 @@ +{ + "generated_at": "2025-03-09T00:00:00Z", + "unfinished": [ + { + "id": "mlfs-package-import", + "title": "Import all MLFS packages into lpkg", + "description": "Parse the Multilib LFS book and scaffold package definitions with optimization defaults (LTO/PGO/-O3).", + "blocked_on": [ + "Implement automated parser" + ], + "owner": "mlfs_researcher" + }, + { + "id": "pgo-integration", + "title": "Integrate profile guided optimization support", + "description": "Add infrastructure for collection and replay of profiling data during package builds.", + "blocked_on": [ + "Decide on profiling workload definitions" + ], + "owner": "default_cli" + }, + { + "id": "lfs-html-parsers", + "title": "Automate LFS/BLFS/GLFS ingest via HTML parsing", + "description": "Avoid hardcoded package data; download the upstream books (LFS, BLFS, GLFS) and parse them to drive scaffolding and metadata updates.", + "blocked_on": [ + "Design resilient scraping strategies for each book", + "Implement incremental update workflow" + ], + "owner": "mlfs_researcher" + } + ], + "solved": [ + { + "id": "ai-metadata-store", + "title": "Create AI metadata directory", + "description": "Introduce ai/personas.json, ai/tasks.json, ai/bugs.json for persistent assistant context.", + "resolution": "Initial JSON files checked in with placeholder content.", + "owner": "default_cli" + }, + { + "id": "metadata-schema-v0.1", + "title": "Define package metadata schema", + "description": "Specify JSON schema and layout for storing scraped package detail from LFS family books.", + "resolution": "Added ai/metadata/schema.json with v0.1.0 structure and seeded initial package entry/index.", + "owner": "default_cli" + }, + { + "id": "metadata-indexer-cli", + "title": "Build metadata validation/indexing tool", + "description": "Provide a standalone CLI to validate package metadata against the schema and regenerate ai/metadata/index.json.", + "resolution": "Added src/bin/metadata_indexer.rs with schema validation, summary extraction, and index writer integration.", + "owner": "default_cli" + } + ] +} diff --git a/data/mlfs_ml-12.4-40-multilib.json b/data/mlfs_ml-12.4-40-multilib.json new file mode 100644 index 0000000..a5d88e9 --- /dev/null +++ b/data/mlfs_ml-12.4-40-multilib.json @@ -0,0 +1,1019 @@ +[ + { + "chapter": 5, + "name": "binutils", + "notes": null, + "section": "Binutils-2.45 - Pass 1", + "stage": "cross-toolchain", + "variant": "pass1", + "version": "2.45" + }, + { + "chapter": 5, + "name": "gcc", + "notes": null, + "section": "GCC-15.2.0 - Pass 1", + "stage": "cross-toolchain", + "variant": "pass1", + "version": "15.2.0" + }, + { + "chapter": 5, + "name": "linux-api-headers", + "notes": null, + "section": "Linux-6.16.9 API Headers", + "stage": "cross-toolchain", + "variant": null, + "version": "6.16.9" + }, + { + "chapter": 5, + "name": "glibc", + "notes": "Temporary toolchain", + "section": "Glibc-2.42", + "stage": "cross-toolchain", + "variant": null, + "version": "2.42" + }, + { + "chapter": 5, + "name": "libstdc++", + "notes": null, + "section": "Libstdc++ from GCC-15.2.0", + "stage": "cross-toolchain", + "variant": null, + "version": "15.2.0" + }, + { + "chapter": 6, + "name": "m4", + "notes": null, + "section": "M4-1.4.20", + "stage": "temporary-tools", + "variant": null, + "version": "1.4.20" + }, + { + "chapter": 6, + "name": "ncurses", + "notes": null, + "section": "Ncurses-6.5-20250809", + "stage": "temporary-tools", + "variant": null, + "version": "6.5-20250809" + }, + { + "chapter": 6, + "name": "bash", + "notes": null, + "section": "Bash-5.3", + "stage": "temporary-tools", + "variant": null, + "version": "5.3" + }, + { + "chapter": 6, + "name": "coreutils", + "notes": null, + "section": "Coreutils-9.8", + "stage": "temporary-tools", + "variant": null, + "version": "9.8" + }, + { + "chapter": 6, + "name": "diffutils", + "notes": null, + "section": "Diffutils-3.12", + "stage": "temporary-tools", + "variant": null, + "version": "3.12" + }, + { + "chapter": 6, + "name": "file", + "notes": null, + "section": "File-5.46", + "stage": "temporary-tools", + "variant": null, + "version": "5.46" + }, + { + "chapter": 6, + "name": "findutils", + "notes": null, + "section": "Findutils-4.10.0", + "stage": "temporary-tools", + "variant": null, + "version": "4.10.0" + }, + { + "chapter": 6, + "name": "gawk", + "notes": null, + "section": "Gawk-5.3.2", + "stage": "temporary-tools", + "variant": null, + "version": "5.3.2" + }, + { + "chapter": 6, + "name": "grep", + "notes": null, + "section": "Grep-3.12", + "stage": "temporary-tools", + "variant": null, + "version": "3.12" + }, + { + "chapter": 6, + "name": "gzip", + "notes": null, + "section": "Gzip-1.14", + "stage": "temporary-tools", + "variant": null, + "version": "1.14" + }, + { + "chapter": 6, + "name": "make", + "notes": null, + "section": "Make-4.4.1", + "stage": "temporary-tools", + "variant": null, + "version": "4.4.1" + }, + { + "chapter": 6, + "name": "patch", + "notes": null, + "section": "Patch-2.8", + "stage": "temporary-tools", + "variant": null, + "version": "2.8" + }, + { + "chapter": 6, + "name": "sed", + "notes": null, + "section": "Sed-4.9", + "stage": "temporary-tools", + "variant": null, + "version": "4.9" + }, + { + "chapter": 6, + "name": "tar", + "notes": null, + "section": "Tar-1.35", + "stage": "temporary-tools", + "variant": null, + "version": "1.35" + }, + { + "chapter": 6, + "name": "xz", + "notes": null, + "section": "Xz-5.8.1", + "stage": "temporary-tools", + "variant": null, + "version": "5.8.1" + }, + { + "chapter": 6, + "name": "binutils", + "notes": null, + "section": "Binutils-2.45 - Pass 2", + "stage": "temporary-tools", + "variant": "pass2", + "version": "2.45" + }, + { + "chapter": 6, + "name": "gcc", + "notes": null, + "section": "GCC-15.2.0 - Pass 2", + "stage": "temporary-tools", + "variant": "pass2", + "version": "15.2.0" + }, + { + "chapter": 7, + "name": "gettext", + "notes": null, + "section": "Gettext-0.26", + "stage": "temporary-tools", + "variant": null, + "version": "0.26" + }, + { + "chapter": 7, + "name": "bison", + "notes": null, + "section": "Bison-3.8.2", + "stage": "temporary-tools", + "variant": null, + "version": "3.8.2" + }, + { + "chapter": 7, + "name": "perl", + "notes": null, + "section": "Perl-5.42.0", + "stage": "temporary-tools", + "variant": null, + "version": "5.42.0" + }, + { + "chapter": 7, + "name": "python", + "notes": null, + "section": "Python-3.13.7", + "stage": "temporary-tools", + "variant": null, + "version": "3.13.7" + }, + { + "chapter": 7, + "name": "texinfo", + "notes": null, + "section": "Texinfo-7.2", + "stage": "temporary-tools", + "variant": null, + "version": "7.2" + }, + { + "chapter": 7, + "name": "util-linux", + "notes": null, + "section": "Util-linux-2.41.2", + "stage": "temporary-tools", + "variant": null, + "version": "2.41.2" + }, + { + "chapter": 8, + "name": "man-pages", + "notes": null, + "section": "Man-pages-6.15", + "stage": "system", + "variant": null, + "version": "6.15" + }, + { + "chapter": 8, + "name": "iana-etc", + "notes": null, + "section": "Iana-Etc-20250926", + "stage": "system", + "variant": null, + "version": "20250926" + }, + { + "chapter": 8, + "name": "glibc", + "notes": null, + "section": "Glibc-2.42", + "stage": "system", + "variant": null, + "version": "2.42" + }, + { + "chapter": 8, + "name": "zlib", + "notes": null, + "section": "Zlib-1.3.1", + "stage": "system", + "variant": null, + "version": "1.3.1" + }, + { + "chapter": 8, + "name": "bzip2", + "notes": null, + "section": "Bzip2-1.0.8", + "stage": "system", + "variant": null, + "version": "1.0.8" + }, + { + "chapter": 8, + "name": "xz", + "notes": null, + "section": "Xz-5.8.1", + "stage": "system", + "variant": null, + "version": "5.8.1" + }, + { + "chapter": 8, + "name": "lz4", + "notes": null, + "section": "Lz4-1.10.0", + "stage": "system", + "variant": null, + "version": "1.10.0" + }, + { + "chapter": 8, + "name": "zstd", + "notes": null, + "section": "Zstd-1.5.7", + "stage": "system", + "variant": null, + "version": "1.5.7" + }, + { + "chapter": 8, + "name": "file", + "notes": null, + "section": "File-5.46", + "stage": "system", + "variant": null, + "version": "5.46" + }, + { + "chapter": 8, + "name": "readline", + "notes": null, + "section": "Readline-8.3", + "stage": "system", + "variant": null, + "version": "8.3" + }, + { + "chapter": 8, + "name": "pcre2", + "notes": null, + "section": "Pcre2-10.46", + "stage": "system", + "variant": null, + "version": "10.46" + }, + { + "chapter": 8, + "name": "m4", + "notes": null, + "section": "M4-1.4.20", + "stage": "system", + "variant": null, + "version": "1.4.20" + }, + { + "chapter": 8, + "name": "bc", + "notes": null, + "section": "Bc-7.0.3", + "stage": "system", + "variant": null, + "version": "7.0.3" + }, + { + "chapter": 8, + "name": "flex", + "notes": null, + "section": "Flex-2.6.4", + "stage": "system", + "variant": null, + "version": "2.6.4" + }, + { + "chapter": 8, + "name": "tcl", + "notes": null, + "section": "Tcl-8.6.17", + "stage": "system", + "variant": null, + "version": "8.6.17" + }, + { + "chapter": 8, + "name": "expect", + "notes": null, + "section": "Expect-5.45.4", + "stage": "system", + "variant": null, + "version": "5.45.4" + }, + { + "chapter": 8, + "name": "dejagnu", + "notes": null, + "section": "DejaGNU-1.6.3", + "stage": "system", + "variant": null, + "version": "1.6.3" + }, + { + "chapter": 8, + "name": "pkgconf", + "notes": null, + "section": "Pkgconf-2.5.1", + "stage": "system", + "variant": null, + "version": "2.5.1" + }, + { + "chapter": 8, + "name": "binutils", + "notes": null, + "section": "Binutils-2.45", + "stage": "system", + "variant": null, + "version": "2.45" + }, + { + "chapter": 8, + "name": "gmp", + "notes": null, + "section": "GMP-6.3.0", + "stage": "system", + "variant": null, + "version": "6.3.0" + }, + { + "chapter": 8, + "name": "mpfr", + "notes": null, + "section": "MPFR-4.2.2", + "stage": "system", + "variant": null, + "version": "4.2.2" + }, + { + "chapter": 8, + "name": "mpc", + "notes": null, + "section": "MPC-1.3.1", + "stage": "system", + "variant": null, + "version": "1.3.1" + }, + { + "chapter": 8, + "name": "isl", + "notes": null, + "section": "ISL-0.27", + "stage": "system", + "variant": null, + "version": "0.27" + }, + { + "chapter": 8, + "name": "attr", + "notes": null, + "section": "Attr-2.5.2", + "stage": "system", + "variant": null, + "version": "2.5.2" + }, + { + "chapter": 8, + "name": "acl", + "notes": null, + "section": "Acl-2.3.2", + "stage": "system", + "variant": null, + "version": "2.3.2" + }, + { + "chapter": 8, + "name": "libcap", + "notes": null, + "section": "Libcap-2.76", + "stage": "system", + "variant": null, + "version": "2.76" + }, + { + "chapter": 8, + "name": "libxcrypt", + "notes": null, + "section": "Libxcrypt-4.4.38", + "stage": "system", + "variant": null, + "version": "4.4.38" + }, + { + "chapter": 8, + "name": "shadow", + "notes": null, + "section": "Shadow-4.18.0", + "stage": "system", + "variant": null, + "version": "4.18.0" + }, + { + "chapter": 8, + "name": "gcc", + "notes": null, + "section": "GCC-15.2.0", + "stage": "system", + "variant": null, + "version": "15.2.0" + }, + { + "chapter": 8, + "name": "ncurses", + "notes": null, + "section": "Ncurses-6.5-20250809", + "stage": "system", + "variant": null, + "version": "6.5-20250809" + }, + { + "chapter": 8, + "name": "sed", + "notes": null, + "section": "Sed-4.9", + "stage": "system", + "variant": null, + "version": "4.9" + }, + { + "chapter": 8, + "name": "psmisc", + "notes": null, + "section": "Psmisc-23.7", + "stage": "system", + "variant": null, + "version": "23.7" + }, + { + "chapter": 8, + "name": "gettext", + "notes": null, + "section": "Gettext-0.26", + "stage": "system", + "variant": null, + "version": "0.26" + }, + { + "chapter": 8, + "name": "bison", + "notes": null, + "section": "Bison-3.8.2", + "stage": "system", + "variant": null, + "version": "3.8.2" + }, + { + "chapter": 8, + "name": "grep", + "notes": null, + "section": "Grep-3.12", + "stage": "system", + "variant": null, + "version": "3.12" + }, + { + "chapter": 8, + "name": "bash", + "notes": null, + "section": "Bash-5.3", + "stage": "system", + "variant": null, + "version": "5.3" + }, + { + "chapter": 8, + "name": "libtool", + "notes": null, + "section": "Libtool-2.5.4", + "stage": "system", + "variant": null, + "version": "2.5.4" + }, + { + "chapter": 8, + "name": "gdbm", + "notes": null, + "section": "GDBM-1.26", + "stage": "system", + "variant": null, + "version": "1.26" + }, + { + "chapter": 8, + "name": "gperf", + "notes": null, + "section": "Gperf-3.3", + "stage": "system", + "variant": null, + "version": "3.3" + }, + { + "chapter": 8, + "name": "expat", + "notes": null, + "section": "Expat-2.7.3", + "stage": "system", + "variant": null, + "version": "2.7.3" + }, + { + "chapter": 8, + "name": "inetutils", + "notes": null, + "section": "Inetutils-2.6", + "stage": "system", + "variant": null, + "version": "2.6" + }, + { + "chapter": 8, + "name": "less", + "notes": null, + "section": "Less-679", + "stage": "system", + "variant": null, + "version": "679" + }, + { + "chapter": 8, + "name": "perl", + "notes": null, + "section": "Perl-5.42.0", + "stage": "system", + "variant": null, + "version": "5.42.0" + }, + { + "chapter": 8, + "name": "xml-parser", + "notes": null, + "section": "XML::Parser-2.47", + "stage": "system", + "variant": null, + "version": "2.47" + }, + { + "chapter": 8, + "name": "intltool", + "notes": null, + "section": "Intltool-0.51.0", + "stage": "system", + "variant": null, + "version": "0.51.0" + }, + { + "chapter": 8, + "name": "autoconf", + "notes": null, + "section": "Autoconf-2.72", + "stage": "system", + "variant": null, + "version": "2.72" + }, + { + "chapter": 8, + "name": "automake", + "notes": null, + "section": "Automake-1.18.1", + "stage": "system", + "variant": null, + "version": "1.18.1" + }, + { + "chapter": 8, + "name": "openssl", + "notes": null, + "section": "OpenSSL-3.5.3", + "stage": "system", + "variant": null, + "version": "3.5.3" + }, + { + "chapter": 8, + "name": "elfutils-libelf", + "notes": null, + "section": "Libelf from Elfutils-0.193", + "stage": "system", + "variant": null, + "version": "0.193" + }, + { + "chapter": 8, + "name": "libffi", + "notes": null, + "section": "Libffi-3.5.2", + "stage": "system", + "variant": null, + "version": "3.5.2" + }, + { + "chapter": 8, + "name": "sqlite", + "notes": null, + "section": "Sqlite-3500400", + "stage": "system", + "variant": null, + "version": "3500400" + }, + { + "chapter": 8, + "name": "python", + "notes": null, + "section": "Python-3.13.7", + "stage": "system", + "variant": null, + "version": "3.13.7" + }, + { + "chapter": 8, + "name": "flit-core", + "notes": null, + "section": "Flit-Core-3.12.0", + "stage": "system", + "variant": null, + "version": "3.12.0" + }, + { + "chapter": 8, + "name": "packaging", + "notes": null, + "section": "Packaging-25.0", + "stage": "system", + "variant": null, + "version": "25.0" + }, + { + "chapter": 8, + "name": "wheel", + "notes": null, + "section": "Wheel-0.46.1", + "stage": "system", + "variant": null, + "version": "0.46.1" + }, + { + "chapter": 8, + "name": "setuptools", + "notes": null, + "section": "Setuptools-80.9.0", + "stage": "system", + "variant": null, + "version": "80.9.0" + }, + { + "chapter": 8, + "name": "ninja", + "notes": null, + "section": "Ninja-1.13.1", + "stage": "system", + "variant": null, + "version": "1.13.1" + }, + { + "chapter": 8, + "name": "meson", + "notes": null, + "section": "Meson-1.9.1", + "stage": "system", + "variant": null, + "version": "1.9.1" + }, + { + "chapter": 8, + "name": "kmod", + "notes": null, + "section": "Kmod-34.2", + "stage": "system", + "variant": null, + "version": "34.2" + }, + { + "chapter": 8, + "name": "coreutils", + "notes": null, + "section": "Coreutils-9.8", + "stage": "system", + "variant": null, + "version": "9.8" + }, + { + "chapter": 8, + "name": "diffutils", + "notes": null, + "section": "Diffutils-3.12", + "stage": "system", + "variant": null, + "version": "3.12" + }, + { + "chapter": 8, + "name": "gawk", + "notes": null, + "section": "Gawk-5.3.2", + "stage": "system", + "variant": null, + "version": "5.3.2" + }, + { + "chapter": 8, + "name": "findutils", + "notes": null, + "section": "Findutils-4.10.0", + "stage": "system", + "variant": null, + "version": "4.10.0" + }, + { + "chapter": 8, + "name": "groff", + "notes": null, + "section": "Groff-1.23.0", + "stage": "system", + "variant": null, + "version": "1.23.0" + }, + { + "chapter": 8, + "name": "grub", + "notes": null, + "section": "GRUB-2.12", + "stage": "system", + "variant": null, + "version": "2.12" + }, + { + "chapter": 8, + "name": "gzip", + "notes": null, + "section": "Gzip-1.14", + "stage": "system", + "variant": null, + "version": "1.14" + }, + { + "chapter": 8, + "name": "iproute2", + "notes": null, + "section": "IPRoute2-6.16.0", + "stage": "system", + "variant": null, + "version": "6.16.0" + }, + { + "chapter": 8, + "name": "kbd", + "notes": null, + "section": "Kbd-2.9.0", + "stage": "system", + "variant": null, + "version": "2.9.0" + }, + { + "chapter": 8, + "name": "libpipeline", + "notes": null, + "section": "Libpipeline-1.5.8", + "stage": "system", + "variant": null, + "version": "1.5.8" + }, + { + "chapter": 8, + "name": "make", + "notes": null, + "section": "Make-4.4.1", + "stage": "system", + "variant": null, + "version": "4.4.1" + }, + { + "chapter": 8, + "name": "patch", + "notes": null, + "section": "Patch-2.8", + "stage": "system", + "variant": null, + "version": "2.8" + }, + { + "chapter": 8, + "name": "tar", + "notes": null, + "section": "Tar-1.35", + "stage": "system", + "variant": null, + "version": "1.35" + }, + { + "chapter": 8, + "name": "texinfo", + "notes": null, + "section": "Texinfo-7.2", + "stage": "system", + "variant": null, + "version": "7.2" + }, + { + "chapter": 8, + "name": "vim", + "notes": null, + "section": "Vim-9.1.1806", + "stage": "system", + "variant": null, + "version": "9.1.1806" + }, + { + "chapter": 8, + "name": "markupsafe", + "notes": null, + "section": "MarkupSafe-3.0.3", + "stage": "system", + "variant": null, + "version": "3.0.3" + }, + { + "chapter": 8, + "name": "jinja2", + "notes": null, + "section": "Jinja2-3.1.6", + "stage": "system", + "variant": null, + "version": "3.1.6" + }, + { + "chapter": 8, + "name": "systemd-udev", + "notes": null, + "section": "Udev from Systemd-257.8", + "stage": "system", + "variant": null, + "version": "257.8" + }, + { + "chapter": 8, + "name": "man-db", + "notes": null, + "section": "Man-DB-2.13.1", + "stage": "system", + "variant": null, + "version": "2.13.1" + }, + { + "chapter": 8, + "name": "procps-ng", + "notes": null, + "section": "Procps-ng-4.0.5", + "stage": "system", + "variant": null, + "version": "4.0.5" + }, + { + "chapter": 8, + "name": "util-linux", + "notes": null, + "section": "Util-linux-2.41.2", + "stage": "system", + "variant": null, + "version": "2.41.2" + }, + { + "chapter": 8, + "name": "e2fsprogs", + "notes": null, + "section": "E2fsprogs-1.47.3", + "stage": "system", + "variant": null, + "version": "1.47.3" + }, + { + "chapter": 8, + "name": "sysklogd", + "notes": null, + "section": "Sysklogd-2.7.2", + "stage": "system", + "variant": null, + "version": "2.7.2" + }, + { + "chapter": 8, + "name": "sysvinit", + "notes": null, + "section": "SysVinit-3.14", + "stage": "system", + "variant": null, + "version": "3.14" + }, + { + "chapter": 9, + "name": "lfs-bootscripts", + "notes": null, + "section": "LFS-Bootscripts-20250827", + "stage": "system", + "variant": null, + "version": "20250827" + }, + { + "chapter": 10, + "name": "linux", + "notes": "Final kernel build", + "section": "Linux-6.16.9", + "stage": "system", + "variant": null, + "version": "6.16.9" + } +] diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md new file mode 100644 index 0000000..19f9fb8 --- /dev/null +++ b/docs/ARCHITECTURE.md @@ -0,0 +1,117 @@ +# Architecture Overview + +This project is split into a reusable Rust library crate (`package_management`) +and several binaries that orchestrate day-to-day workflows. The sections below +outline the main entry points and how the supporting modules fit together. + +## CLI entry points + +| Binary | Location | Purpose | +| ------ | -------- | ------- | +| `lpkg` | `src/main.rs` | Primary command-line interface with workflow automation and optional TUI integration. | +| `metadata_indexer` | `src/bin/metadata_indexer.rs` | Harvests LFS/BLFS/GLFS package metadata, validates it against the JSON schema, and keeps `ai/metadata/index.json` up to date. | + +### `lpkg` workflows + +`lpkg` uses [Clap](https://docs.rs/clap) to expose multiple subcommands: + +- `EnvCheck` – fetches `
` blocks from an LFS-style HTML page and runs the
+  embedded `ver_check` / `ver_kernel` scripts.
+- `FetchManifests` – downloads the book’s canonical `wget-list` and `md5sums`
+  files and writes them to disk.
+- `BuildBinutils` – parses the Binutils Pass 1 page, mirrors the documented
+  build steps, and executes them in a Tokio runtime.
+- `ScaffoldPackage` – generates a new module under `src/pkgs/by_name/` with
+  optimisation defaults (LTO/PGO/`-O3`) and persists metadata via the DB
+  helpers.
+- `ImportMlfs` – walks the MLFS catalogue, scaffolding definitions and storing
+  them in the database (with optional `--dry-run`, `--limit`, and `--overwrite`).
+
+When compiled with the `tui` feature flag, the CLI also exposes
+`lpkg tui disk-manager`, which drops the user into the terminal UI defined in
+`src/tui/`.
+
+### `metadata_indexer`
+
+The `metadata_indexer` binary is a companion tool for maintaining the JSON
+artifacts under `ai/metadata/`:
+
+- `validate` – validates every `packages/**.json` file against
+  `ai/metadata/schema.json` and reports schema or summary extraction issues.
+- `index` – revalidates the metadata and regenerates
+  `ai/metadata/index.json` (use `--compact` for single-line JSON).
+- `harvest` – fetches a given book page, extracts build metadata, and emits a
+  schema-compliant JSON skeleton. When direct HTML parsing does not locate the
+  source tarball, it falls back to the jhalfs `wget-list` data to populate
+  `source.urls`.
+
+## Module layout
+
+```
+src/
+  ai/             // JSON loaders for repository personas, tasks, and bugs
+  db/             // Diesel database setup and models
+  html.rs         // Lightweight HTML helpers (fetch + parse 
 blocks)
+  ingest/         // Parsers for LFS / MLFS / BLFS / GLFS book content
+  md5_utils.rs    // Fetches canonical md5sums from the book mirror
+  mirrors.rs      // Lists official source mirrors for downloads
+  pkgs/           // Package scaffolding and metadata definition helpers
+  tui/            // Optional terminal UI (crossterm + tui)
+  version_check.rs// Executes ver_check / ver_kernel snippets
+  wget_list.rs    // Fetches jhalfs-maintained wget-list manifests
+  bin/metadata_indexer.rs // AI metadata CLI described above
+```
+
+### Notable modules
+
+- **`src/pkgs/scaffolder.rs`**
+  - Generates filesystem modules and `PackageDefinition` records based on a
+    `ScaffoldRequest`.
+  - Normalises directory layout (prefix modules, `mod.rs` entries) and applies
+    optimisation defaults (LTO, PGO, `-O3`).
+
+- **`src/ingest/`**
+  - Provides HTML parsers tailored to each book flavour (LFS, MLFS, BLFS,
+    GLFS). The parsers emit `BookPackage` records consumed by the scaffolder
+    and metadata importer.
+
+- **`src/db/`**
+  - Diesel models and schema for persisting package metadata. `lpkg` uses these
+    helpers when scaffolding or importing packages.
+
+- **`src/tui/`**
+  - Houses the optional terminal interface (disk manager, main menu, settings,
+    downloader). The entry points are conditionally compiled behind the `tui`
+    cargo feature.
+
+## Data & metadata assets
+
+The repository keeps long-lived ARTifacts under `ai/`:
+
+- `ai/metadata/` – JSON schema (`schema.json`), package records, and a generated
+  index (`index.json`). The `metadata_indexer` binary maintains these files.
+- `ai/personas.json`, `ai/tasks.json`, `ai/bugs.json` – contextual data for
+  automated assistance.
+- `ai/notes.md` – scratchpad for future work (e.g., jhalfs integration).
+
+`data/` currently contains catalogues derived from the MLFS book and can be
+extended with additional book snapshots.
+
+## Database and persistence
+
+The Diesel setup uses SQLite (via the `diesel` crate with `sqlite` and `r2d2`
+features enabled). Connection pooling lives in `src/db/mod.rs` and is consumed
+by workflows that scaffold or import packages.
+
+## Optional terminal UI
+
+The TUI resolves around `DiskManager` (a crossterm + tui based interface for
+GPT partition inspection and creation). Additional stubs (`main_menu.rs`,
+`settings.rs`, `downloader.rs`) are present for future expansion. The main CLI
+falls back to `DiskManager::run_tui()` whenever `lpkg` is invoked without a
+subcommand and is compiled with `--features tui`.
+
+---
+
+For more operational details around metadata harvesting, refer to
+[`docs/METADATA_PIPELINE.md`](./METADATA_PIPELINE.md).
diff --git a/docs/METADATA_PIPELINE.md b/docs/METADATA_PIPELINE.md
new file mode 100644
index 0000000..895031c
--- /dev/null
+++ b/docs/METADATA_PIPELINE.md
@@ -0,0 +1,83 @@
+# Metadata Harvesting Pipeline
+
+This repository tracks AI-friendly package metadata under `ai/metadata/`.
+The `metadata_indexer` binary orchestrates validation and harvesting tasks.
+This document explains the workflow and the supporting assets.
+
+## Directory layout
+
+- `ai/metadata/schema.json` – JSON Schema (Draft 2020-12) describing one
+  package record.
+- `ai/metadata/packages//.json` – harvested package metadata.
+- `ai/metadata/index.json` – generated summary table linking package IDs to
+  their JSON files.
+- `ai/notes.md` – scratchpad for future improvements (e.g., jhalfs integration).
+
+## `metadata_indexer` commands
+
+| Command | Description |
+| ------- | ----------- |
+| `validate` | Loads every package JSON file and validates it against `schema.json`. Reports schema violations and summary extraction errors. |
+| `index` | Re-runs validation and regenerates `index.json`. Use `--compact` to write a single-line JSON payload. |
+| `harvest` | Fetches a book page, scrapes build instructions, and emits a draft metadata record (to stdout with `--dry-run` or into `ai/metadata/packages/`). |
+
+### Harvesting flow
+
+1. **Fetch HTML** – the requested page is downloaded with `reqwest` and parsed
+   using `scraper` selectors.
+2. **Heading metadata** – the `h1.sect1` title provides the chapter/section,
+   canonical package name, version, and optional variant hints.
+3. **Build steps** – `
` blocks become ordered `build`
+   phases (`setup`, `configure`, `build`, `test`, `install`).
+4. **Artifact stats** – `div.segmentedlist` entries supply SBU and disk usage.
+5. **Source URLs** – the harvester tries two strategies:
+   - Inline HTML links inside the page (common for BLFS articles).
+   - Fallback to the jhalfs `wget-list` for the selected book (currently MLFS)
+     using `package-management::wget_list::get_wget_list` to find matching
+     `-` entries.
+6. **Checksums** – integration with the book’s `md5sums` mirror is pending;
+   placeholder wiring exists (`src/md5_utils.rs`).
+7. **Status** – unresolved items (missing URLs, anchors, etc.) are recorded in
+   `status.issues` so humans can interrogate or patch the draft before
+   promoting it.
+
+### Known gaps
+
+- **Source links via tables** – some MLFS chapters list download links inside a
+  “Package Information” table. The current implementation relies on the
+  jhalfs `wget-list` fallback instead of parsing that table.
+- **Checksums** – MD5 lookups from jhalfs are planned but not yet wired into
+  the harvest pipeline.
+- **Anchor discovery** – if the heading lacks an explicit `id` attribute, the
+  scraper attempts to locate child anchors or scan the raw HTML. If none are
+  found, a warning is recorded and `status.issues` contains a reminder.
+
+## Using jhalfs manifests
+
+The maintained `wget-list`/`md5sums` files hosted by jhalfs provide canonical
+source URLs and hashes. The helper modules `src/wget_list.rs` and
+`src/md5_utils.rs` download these lists for the multilib LFS book. The
+harvester currently consumes the wget-list as a fallback; integrating the
+`md5sums` file will let us emit `source.checksums` automatically.
+
+Planned enhancements (see `ai/notes.md` and `ai/bugs.json#metadata-harvest-no-source-urls`):
+
+1. Abstract list fetching so BLFS/GLFS variants can reuse the logic.
+2. Normalise the match criteria for package + version (handling pass stages,
+   suffixes, etc.).
+3. Populate checksum entries alongside URLs.
+
+## Manual review checklist
+
+When a new metadata file is generated:
+
+- `schema_version` should match `schema.json` (currently `v0.1.0`).
+- `package.id` should be unique (format `/`).
+- `source.urls` must include at least one primary URL; add mirrors/patches as
+  needed.
+- Clear any `status.issues` before promoting the record from `draft`.
+- Run `cargo run --bin metadata_indexer -- --base-dir . index` to regenerate
+  the global index once the draft is finalised.
+
+Refer to `README.md` for usage examples and to `docs/ARCHITECTURE.md` for a
+broader overview of the crate layout.
diff --git a/src/ai/mod.rs b/src/ai/mod.rs
new file mode 100644
index 0000000..fb9baca
--- /dev/null
+++ b/src/ai/mod.rs
@@ -0,0 +1,79 @@
+use std::path::{Path, PathBuf};
+
+use anyhow::Result;
+use serde::Deserialize;
+
+/// Loads assistant persona metadata from `ai/personas.json`.
+pub fn load_personas(base_dir: impl AsRef) -> Result> {
+    let path = resolve(base_dir, "personas.json");
+    read_json(path)
+}
+
+/// Loads the tracked task board from `ai/tasks.json`.
+pub fn load_tasks(base_dir: impl AsRef) -> Result {
+    let path = resolve(base_dir, "tasks.json");
+    read_json(path)
+}
+
+/// Loads the current bug ledger from `ai/bugs.json`.
+pub fn load_bugs(base_dir: impl AsRef) -> Result> {
+    let path = resolve(base_dir, "bugs.json");
+    read_json(path)
+}
+
+fn resolve(base_dir: impl AsRef, file: &str) -> PathBuf {
+    base_dir.as_ref().join("ai").join(file)
+}
+
+fn read_json(path: PathBuf) -> Result
+where
+    T: for<'de> Deserialize<'de>,
+{
+    let data = std::fs::read_to_string(&path)?;
+    Ok(serde_json::from_str(&data)?)
+}
+
+#[derive(Debug, Deserialize)]
+pub struct Persona {
+    pub id: String,
+    pub name: String,
+    pub description: String,
+    #[serde(default)]
+    pub strengths: Vec,
+    #[serde(default)]
+    pub notes: String,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct TaskBoard {
+    pub generated_at: String,
+    pub unfinished: Vec,
+    pub solved: Vec,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct Task {
+    pub id: String,
+    pub title: String,
+    pub description: String,
+    #[serde(default)]
+    pub blocked_on: Vec,
+    #[serde(default)]
+    pub owner: Option,
+    #[serde(default)]
+    pub resolution: Option,
+}
+
+#[derive(Debug, Deserialize)]
+pub struct Bug {
+    pub id: String,
+    pub title: String,
+    pub description: String,
+    pub status: String,
+    #[serde(default)]
+    pub owner: Option,
+    #[serde(default)]
+    pub created_at: Option,
+    #[serde(default)]
+    pub labels: Vec,
+}
diff --git a/src/bin/metadata_indexer.rs b/src/bin/metadata_indexer.rs
new file mode 100644
index 0000000..f058903
--- /dev/null
+++ b/src/bin/metadata_indexer.rs
@@ -0,0 +1,1017 @@
+use std::collections::HashSet;
+use std::fs;
+use std::path::{Path, PathBuf};
+
+use anyhow::{Context, Result};
+use chrono::Utc;
+use clap::{Parser, Subcommand};
+use jsonschema::JSONSchema;
+use regex::Regex;
+use reqwest::{blocking::Client, redirect::Policy};
+use scraper::{ElementRef, Html, Selector};
+use serde_json::{Value, json};
+use sha2::{Digest, Sha256};
+use walkdir::WalkDir;
+
+#[derive(Parser)]
+#[command(
+    name = "metadata-indexer",
+    about = "Validate and regenerate AI metadata index"
+)]
+struct Cli {
+    /// Repository root containing the `ai/metadata` directory
+    #[arg(long, default_value = ".")]
+    base_dir: PathBuf,
+
+    #[command(subcommand)]
+    command: Command,
+}
+
+#[derive(Subcommand)]
+enum Command {
+    /// Validate all package metadata against the JSON schema
+    Validate,
+    /// Validate metadata and regenerate ai/metadata/index.json
+    Index {
+        /// Emit compact JSON instead of pretty printing
+        #[arg(long)]
+        compact: bool,
+    },
+    /// Fetch and draft metadata for a specific package page
+    Harvest {
+        /// Book identifier (lfs, mlfs, blfs, glfs)
+        #[arg(long)]
+        book: String,
+        /// Page path (relative to base) or full URL
+        #[arg(long)]
+        page: String,
+        /// Override base URL for the selected book
+        #[arg(long)]
+        base_url: Option,
+        /// Optional explicit output file path
+        #[arg(long)]
+        output: Option,
+        /// Do not write to disk, just print JSON to stdout
+        #[arg(long)]
+        dry_run: bool,
+    },
+}
+
+fn main() -> Result<()> {
+    let cli = Cli::parse();
+    let base_dir = cli.base_dir.canonicalize().unwrap_or(cli.base_dir);
+    let metadata_dir = base_dir.join("ai").join("metadata");
+    let schema_path = metadata_dir.join("schema.json");
+    let packages_dir = metadata_dir.join("packages");
+
+    let (_schema_value, schema) = load_schema(&schema_path)?;
+    let packages = scan_packages(&packages_dir)?;
+
+    let mut had_errors = false;
+    for package in &packages {
+        let validation = schema.validate(&package.value);
+        if let Err(errors) = validation {
+            had_errors = true;
+            eprintln!(
+                "Schema validation failed for {}:",
+                package.relative_path.display()
+            );
+            for err in errors {
+                eprintln!("  - {}", err);
+            }
+        }
+
+        if let Some(err) = &package.summary_error {
+            had_errors = true;
+            eprintln!(
+                "Summary extraction failed for {}: {}",
+                package.relative_path.display(),
+                err
+            );
+        }
+    }
+
+    match cli.command {
+        Command::Validate => {
+            if had_errors {
+                anyhow::bail!("metadata validation failed");
+            }
+        }
+        Command::Index { compact } => {
+            if had_errors {
+                anyhow::bail!("metadata validation failed; index not updated");
+            }
+
+            let summaries: Vec<_> = packages
+                .iter()
+                .filter_map(|pkg| pkg.summary.clone())
+                .collect();
+
+            let schema_version = summaries
+                .first()
+                .map(|s| s.schema_version.as_str())
+                .unwrap_or("v0.0.0");
+
+            let generated_at = Utc::now().to_rfc3339();
+            let packages_json: Vec = summaries
+                .iter()
+                .map(|s| {
+                    json!({
+                        "id": s.id.clone(),
+                        "name": s.name.clone(),
+                        "version": s.version.clone(),
+                        "stage": s.stage.clone(),
+                        "book": s.book.clone(),
+                        "variant": s.variant.clone(),
+                        "status": s.status.clone(),
+                        "path": s.relative_path.clone(),
+                    })
+                })
+                .collect();
+
+            let index = json!({
+                "generated_at": generated_at,
+                "schema_version": schema_version,
+                "packages": packages_json,
+            });
+
+            let index_path = metadata_dir.join("index.json");
+            let serialized = if compact {
+                serde_json::to_string(&index)?
+            } else {
+                serde_json::to_string_pretty(&index)?
+            };
+            fs::write(&index_path, serialized)
+                .with_context(|| format!("writing {}", index_path.display()))?;
+            println!("Updated {}", index_path.display());
+        }
+        Command::Harvest {
+            book,
+            page,
+            base_url,
+            output,
+            dry_run,
+        } => {
+            let book_lower = book.to_lowercase();
+            let harvest = harvest_metadata(&metadata_dir, &book_lower, &page, base_url.as_deref())?;
+
+            if dry_run {
+                println!("{}", serde_json::to_string_pretty(&harvest.value)?);
+            } else {
+                let output_path = output.unwrap_or_else(|| {
+                    metadata_dir
+                        .join("packages")
+                        .join(&book_lower)
+                        .join(format!("{}.json", harvest.slug))
+                });
+                if let Some(parent) = output_path.parent() {
+                    fs::create_dir_all(parent)
+                        .with_context(|| format!("creating directory {}", parent.display()))?;
+                }
+                fs::write(&output_path, serde_json::to_string_pretty(&harvest.value)?)
+                    .with_context(|| format!("writing {}", output_path.display()))?;
+                println!(
+                    "Harvested metadata for {} -> {}",
+                    harvest.package_id,
+                    output_path.display()
+                );
+                println!(
+                    "Run `metadata_indexer --base-dir {} index` to refresh the index.",
+                    base_dir.display()
+                );
+            }
+        }
+    }
+
+    Ok(())
+}
+
+fn load_schema(path: &Path) -> Result<(&'static Value, JSONSchema)> {
+    let data = fs::read_to_string(path)
+        .with_context(|| format!("reading schema file {}", path.display()))?;
+    let value: Value = serde_json::from_str(&data)
+        .with_context(|| format!("parsing JSON schema {}", path.display()))?;
+    let leaked = Box::leak(Box::new(value));
+    let schema = JSONSchema::compile(leaked).context("compiling JSON schema")?;
+    Ok((leaked, schema))
+}
+
+fn scan_packages(dir: &Path) -> Result> {
+    if !dir.exists() {
+        return Ok(Vec::new());
+    }
+
+    let mut records = Vec::new();
+    for entry in WalkDir::new(dir)
+        .follow_links(false)
+        .into_iter()
+        .filter_map(|e| e.ok())
+        .filter(|e| e.file_type().is_file())
+        .filter(|e| e.path().extension().and_then(|s| s.to_str()) == Some("json"))
+    {
+        let path = entry.into_path();
+        let data = fs::read_to_string(&path)
+            .with_context(|| format!("reading package metadata {}", path.display()))?;
+        let value: Value = serde_json::from_str(&data)
+            .with_context(|| format!("parsing package JSON {}", path.display()))?;
+
+        let relative_path = path
+            .strip_prefix(dir.parent().unwrap_or(Path::new("")))
+            .unwrap_or(&path)
+            .to_path_buf();
+
+        let (summary, summary_error) = match extract_summary(&value, &relative_path) {
+            Ok(summary) => (Some(summary), None),
+            Err(err) => (None, Some(err)),
+        };
+
+        records.push(PackageRecord {
+            value,
+            relative_path,
+            summary,
+            summary_error,
+        });
+    }
+
+    Ok(records)
+}
+
+#[derive(Clone)]
+struct PackageSummary {
+    schema_version: String,
+    id: String,
+    name: String,
+    version: String,
+    stage: Option,
+    book: String,
+    variant: Option,
+    status: String,
+    relative_path: String,
+}
+
+struct PackageRecord {
+    value: Value,
+    relative_path: PathBuf,
+    summary: Option,
+    summary_error: Option,
+}
+
+fn extract_summary(value: &Value, relative_path: &Path) -> Result {
+    let schema_version = value
+        .get("schema_version")
+        .and_then(Value::as_str)
+        .context("missing schema_version")?
+        .to_string();
+    let package = value.get("package").context("missing package block")?;
+    let status = value.get("status").context("missing status block")?;
+
+    let id = package
+        .get("id")
+        .and_then(Value::as_str)
+        .context("missing package.id")?
+        .to_string();
+    let name = package
+        .get("name")
+        .and_then(Value::as_str)
+        .context("missing package.name")?
+        .to_string();
+    let version = package
+        .get("version")
+        .and_then(Value::as_str)
+        .context("missing package.version")?
+        .to_string();
+    let book = package
+        .get("book")
+        .and_then(Value::as_str)
+        .context("missing package.book")?
+        .to_string();
+    let stage = package
+        .get("stage")
+        .and_then(Value::as_str)
+        .map(|s| s.to_string());
+    let variant = package
+        .get("variant")
+        .and_then(Value::as_str)
+        .map(|s| s.to_string());
+    let status_state = status
+        .get("state")
+        .and_then(Value::as_str)
+        .context("missing status.state")?
+        .to_string();
+
+    Ok(PackageSummary {
+        schema_version,
+        id,
+        name,
+        version,
+        stage,
+        book,
+        variant,
+        status: status_state,
+        relative_path: relative_path
+            .to_str()
+            .unwrap_or_default()
+            .replace('\\', "/"),
+    })
+}
+
+struct HarvestResult {
+    value: Value,
+    slug: String,
+    package_id: String,
+}
+
+fn harvest_metadata(
+    metadata_dir: &Path,
+    book: &str,
+    page: &str,
+    override_base: Option<&str>,
+) -> Result {
+    let page_url = resolve_page_url(book, page, override_base)?;
+    let client = Client::builder()
+        .user_agent("lpkg-metadata-indexer/0.1")
+        .build()?;
+    let response = client
+        .get(&page_url)
+        .send()
+        .with_context(|| format!("fetching {}", page_url))?
+        .error_for_status()
+        .with_context(|| format!("non-success status for {}", page_url))?;
+    let html = response
+        .text()
+        .with_context(|| format!("reading response body from {}", page_url))?;
+
+    let document = Html::parse_document(&html);
+    let harvest = build_metadata_value(metadata_dir, book, &page_url, &document, &html)?;
+    Ok(harvest)
+}
+
+fn resolve_page_url(book: &str, page: &str, override_base: Option<&str>) -> Result {
+    if page.starts_with("http://") || page.starts_with("https://") {
+        return Ok(page.to_string());
+    }
+
+    let base = override_base
+        .map(|s| s.to_string())
+        .or_else(|| default_base_url(book).map(|s| s.to_string()))
+        .context("no base URL available for book")?;
+
+    let base = base.trim_end_matches('/');
+    let mut page_path = page.trim_start_matches('/').to_string();
+    if page_path.is_empty() {
+        page_path = "index.html".to_string();
+    }
+    if !page_path.ends_with(".html") {
+        page_path.push_str(".html");
+    }
+
+    Ok(format!("{}/{}", base, page_path))
+}
+
+fn default_base_url(book: &str) -> Option<&'static str> {
+    match book {
+        "lfs" => Some("https://www.linuxfromscratch.org/lfs/view/12.1"),
+        "mlfs" => Some("https://linuxfromscratch.org/~thomas/multilib-m32"),
+        "blfs" => Some("https://www.linuxfromscratch.org/blfs/view/systemd"),
+        "glfs" => Some("https://www.linuxfromscratch.org/glfs/view/glfs"),
+        _ => None,
+    }
+}
+
+fn build_metadata_value(
+    metadata_dir: &Path,
+    book: &str,
+    page_url: &str,
+    document: &Html,
+    html: &str,
+) -> Result {
+    let heading_selector = Selector::parse("h1.sect1").unwrap();
+    let heading = document
+        .select(&heading_selector)
+        .next()
+        .context("no 

found")?; + let heading_text = heading + .text() + .map(|t| t.replace('\u{00a0}', " ")) + .collect::>() + .join(" "); + let heading_clean = normalize_whitespace(&heading_text); + let heading_re = Regex::new(r"^(?P
\d+\.\d+)\.\s+(?P.+)$")?; + let caps = heading_re + .captures(&heading_clean) + .with_context(|| format!("unable to parse heading '{}'", heading_clean))?; + let section = caps["section"].to_string(); + let title = caps["title"].trim().to_string(); + + let (name, version, variant) = split_name_variant(&title); + let chapter_num: u32 = section + .split('.') + .next() + .and_then(|s| s.parse().ok()) + .unwrap_or(0); + let stage = stage_for_chapter(chapter_num).map(|s| s.to_string()); + + let slug_base = slugify(&name); + let slug = variant + .as_ref() + .map(|v| format!("{}-{}", slug_base, slugify(v))) + .unwrap_or_else(|| slug_base.clone()); + let package_id = format!("{}/{}", book, slug); + let package_id_for_json = package_id.clone(); + + let anchor_url = heading + .value() + .id() + .map(|id| format!("{}#{}", page_url, id)) + .or_else(|| locate_child_anchor(&heading).map(|id| format!("{}#{}", page_url, id))) + .or_else(|| { + let anchor_selector = Selector::parse("a[id]").unwrap(); + document + .select(&anchor_selector) + .filter_map(|a| a.value().attr("id")) + .find(|id| id.contains(&slug_base)) + .map(|id| format!("{}#{}", page_url, id)) + }) + .or_else(|| { + let escaped = regex::escape(&slug_base); + let pattern = format!(r#"id=\"([^\"]*{}[^\"]*)\""#, escaped); + Regex::new(&pattern) + .ok() + .and_then(|re| re.captures(html)) + .and_then(|caps| caps.get(1)) + .map(|m| format!("{}#{}", page_url, m.as_str())) + }); + + let mut source_urls = collect_tarball_urls(page_url, document); + let mut archive_name = infer_archive_from_commands(document).or_else(|| { + source_urls.iter().find_map(|entry| { + entry + .url + .path_segments() + .and_then(|mut iter| iter.next_back()) + .map(|s| s.to_string()) + }) + }); + + if source_urls.is_empty() { + match fallback_urls_from_wget(metadata_dir, book, &slug_base, &version) { + Ok(fallback) => { + if !fallback.is_empty() { + eprintln!( + "info: using {} URL(s) from wget-list for {} {}", + fallback.len(), + slug_base, + version + ); + source_urls = fallback; + } + } + Err(err) => { + eprintln!( + "warning: failed to consult wget-list for {} {}: {}", + slug_base, version, err + ); + } + } + } + + if archive_name.is_none() { + archive_name = source_urls.iter().find_map(|entry| { + entry + .url + .path_segments() + .and_then(|mut iter| iter.next_back()) + .map(|s| s.to_string()) + }); + if archive_name.is_none() { + eprintln!( + "warning: unable to infer archive name from source URLs for {} {}", + slug_base, version + ); + } + } + + let (sbu, disk) = extract_artifacts(document); + let build_steps = extract_build_steps(document); + + let mut issues = Vec::new(); + if anchor_url.is_none() { + issues.push("Could not locate anchor id for primary heading".to_string()); + } + if source_urls.is_empty() { + issues.push("No source URLs with archive extensions detected".to_string()); + } + if build_steps.is_empty() { + issues.push("No <pre class=\"userinput\"> blocks found for build commands".to_string()); + } + + let source_urls_json: Vec<Value> = source_urls + .iter() + .map(|entry| { + json!({ + "url": entry.url.as_str(), + "kind": entry.kind, + }) + }) + .collect(); + + let checksum_entries = match resolve_checksums(metadata_dir, book, archive_name.as_deref()) { + Ok(values) => values, + Err(err) => { + eprintln!( + "warning: failed to resolve checksums for {} {}: {}", + slug_base, version, err + ); + Vec::new() + } + }; + + let build_json: Vec<Value> = build_steps + .iter() + .map(|step| { + json!({ + "phase": step.phase, + "commands": step.commands, + "cwd": step.cwd, + "requires_root": step.requires_root, + "notes": step.notes, + }) + }) + .collect(); + + let body_selector = Selector::parse("body").unwrap(); + let book_release = document + .select(&body_selector) + .next() + .and_then(|body| body.value().id()) + .map(|id| id.to_string()) + .unwrap_or_default(); + + let retrieved_at = Utc::now().to_rfc3339(); + let content_hash = hex::encode(Sha256::digest(html.as_bytes())); + + let anchors_value = match anchor_url { + Some(ref href) => json!({ "section": href }), + None => json!({}), + }; + + let status_state = "draft"; + + let package_json = json!({ + "schema_version": "v0.1.0", + "package": { + "id": package_id_for_json, + "name": name, + "upstream": Option::<String>::None, + "version": version, + "book": book, + "chapter": chapter_num, + "section": section, + "stage": stage, + "variant": variant, + "anchors": anchors_value, + }, + "source": { + "urls": source_urls_json, + "archive": archive_name, + "checksums": checksum_entries, + }, + "artifacts": { + "sbu": sbu, + "disk": disk, + "install_prefix": Option::<String>::None, + }, + "dependencies": { + "build": Vec::<Value>::new(), + "runtime": Vec::<Value>::new(), + }, + "environment": { + "variables": Vec::<Value>::new(), + "users": Vec::<Value>::new(), + }, + "build": build_json, + "optimizations": { + "enable_lto": true, + "enable_pgo": true, + "cflags": ["-O3", "-flto"], + "ldflags": ["-flto"], + "profdata": Option::<String>::None, + }, + "provenance": { + "book_release": book_release, + "page_url": page_url, + "retrieved_at": retrieved_at, + "content_hash": content_hash, + }, + "status": { + "state": status_state, + "issues": issues, + } + }); + + Ok(HarvestResult { + value: package_json, + slug, + package_id, + }) +} + +fn normalize_whitespace(input: &str) -> String { + let mut result = String::with_capacity(input.len()); + let mut prev_space = false; + for ch in input.chars() { + if ch.is_whitespace() { + if !prev_space { + result.push(' '); + prev_space = true; + } + } else { + prev_space = false; + result.push(ch); + } + } + result.trim().to_string() +} + +fn slugify(input: &str) -> String { + let mut result = String::new(); + let mut prev_dash = false; + for ch in input.chars() { + let normalized = match ch { + 'A'..='Z' => ch.to_ascii_lowercase(), + 'a'..='z' | '0'..='9' => ch, + _ => '-', + }; + if normalized == '-' { + if !prev_dash { + result.push('-'); + prev_dash = true; + } + } else { + prev_dash = false; + result.push(normalized); + } + } + result.trim_matches('-').to_string() +} + +fn split_name_variant(title: &str) -> (String, String, Option<String>) { + let mut base = title.trim().to_string(); + let mut variant = None; + if let Some(idx) = base.rfind(" - ") { + variant = Some(base[idx + 3..].trim().to_string()); + base = base[..idx].trim().to_string(); + } + + let bytes = base.as_bytes(); + for idx in (0..bytes.len()).rev() { + if bytes[idx] == b'-' { + if let Some(next) = bytes.get(idx + 1) { + if next.is_ascii_digit() { + let name = base[..idx].trim(); + let version = base[idx + 1..].trim(); + if !name.is_empty() && !version.is_empty() { + return (name.to_string(), version.to_string(), variant); + } + } + } + } + } + + (base, String::from("unknown"), variant) +} + +fn stage_for_chapter(chapter: u32) -> Option<&'static str> { + match chapter { + 5 => Some("cross-toolchain"), + 6 | 7 => Some("temporary-tools"), + 8 => Some("system"), + 9 => Some("system-configuration"), + 10 => Some("system-finalization"), + _ => None, + } +} + +struct SourceUrlEntry { + url: url::Url, + kind: &'static str, +} + +enum ManifestKind { + WgetList, + Md5Sums, +} + +impl ManifestKind { + fn filename(&self) -> &'static str { + match self { + ManifestKind::WgetList => "wget-list.txt", + ManifestKind::Md5Sums => "md5sums.txt", + } + } +} + +fn collect_tarball_urls(page_url: &str, document: &Html) -> Vec<SourceUrlEntry> { + let base = url::Url::parse(page_url).ok(); + let link_selector = Selector::parse("a").unwrap(); + let mut seen = HashSet::new(); + let mut results = Vec::new(); + + for link in document.select(&link_selector) { + if let Some(href) = link.value().attr("href") { + if let Some(kind) = classify_artifact_url(href) { + let resolved = match (&base, url::Url::parse(href)) { + (_, Ok(url)) => url, + (Some(base_url), Err(_)) => match base_url.join(href) { + Ok(url) => url, + Err(_) => continue, + }, + _ => continue, + }; + if seen.insert(resolved.clone()) { + results.push(SourceUrlEntry { + url: resolved, + kind, + }); + } + } + } + } + + results +} + +fn classify_artifact_url(href: &str) -> Option<&'static str> { + let lower = href.to_lowercase(); + if lower.ends_with(".tar") + || lower.ends_with(".tar.gz") + || lower.ends_with(".tar.bz2") + || lower.ends_with(".tar.xz") + || lower.ends_with(".tgz") + || lower.ends_with(".zip") + { + Some("primary") + } else if lower.ends_with(".patch") { + Some("patch") + } else if lower.ends_with(".sig") || lower.ends_with(".asc") { + Some("signature") + } else { + None + } +} + +fn fallback_urls_from_wget( + metadata_dir: &Path, + book: &str, + slug: &str, + version: &str, +) -> Result<Vec<SourceUrlEntry>> { + let manifest = load_jhalfs_manifest(metadata_dir, book, ManifestKind::WgetList)?; + let needle = format!("{}-{}", slug.replace('_', "-"), version); + eprintln!("debug: searching wget-list for '{}'", needle); + let mut entries = Vec::new(); + for line in manifest.lines() { + if line.contains(&needle) { + if let Ok(url) = url::Url::parse(line.trim()) { + eprintln!("info: matched wget URL {}", url); + entries.push(SourceUrlEntry { + url, + kind: "primary", + }); + } else { + eprintln!( + "warning: unable to parse URL from wget-list line: {}", + line.trim() + ); + } + } + } + if entries.is_empty() { + eprintln!("warning: no wget-list entries matched '{}'", needle); + } + Ok(entries) +} + +fn resolve_checksums( + metadata_dir: &Path, + book: &str, + archive_name: Option<&str>, +) -> Result<Vec<Value>> { + let mut checksums = Vec::new(); + let Some(archive) = archive_name else { + return Ok(checksums); + }; + + let manifest = load_jhalfs_manifest(metadata_dir, book, ManifestKind::Md5Sums)?; + for line in manifest.lines() { + let trimmed = line.trim(); + if trimmed.is_empty() { + continue; + } + let mut parts = trimmed.split_whitespace(); + let Some(hash) = parts.next() else { continue }; + let Some(file) = parts.next() else { continue }; + if file == archive { + checksums.push(json!({ + "alg": "md5", + "value": hash.to_lowercase(), + })); + break; + } + } + + Ok(checksums) +} + +fn load_jhalfs_manifest(metadata_dir: &Path, book: &str, kind: ManifestKind) -> Result<String> { + let cache_dir = metadata_dir.join("cache"); + fs::create_dir_all(&cache_dir) + .with_context(|| format!("creating cache directory {}", cache_dir.display()))?; + + let cache_path = cache_dir.join(format!("{}-{}", book, kind.filename())); + if cache_path.exists() { + return fs::read_to_string(&cache_path) + .with_context(|| format!("reading cached manifest {}", cache_path.display())); + } + + let url = manifest_url(book, &kind) + .with_context(|| format!("no manifest URL configured for book '{}'", book))?; + + let client = Client::builder().redirect(Policy::limited(5)).build()?; + let body = client + .get(url) + .send() + .with_context(|| format!("fetching {}", url))? + .error_for_status() + .with_context(|| format!("request failed for {}", url))? + .text() + .with_context(|| format!("reading response body from {}", url))?; + + fs::write(&cache_path, &body) + .with_context(|| format!("caching manifest {}", cache_path.display()))?; + + Ok(body) +} + +fn manifest_url(book: &str, kind: &ManifestKind) -> Option<&'static str> { + match (book, kind) { + ("mlfs", ManifestKind::WgetList) => { + Some("https://www.linuxfromscratch.org/~thomas/multilib-m32/wget-list-sysv") + } + ("mlfs", ManifestKind::Md5Sums) => { + Some("https://www.linuxfromscratch.org/~thomas/multilib-m32/md5sums") + } + ("lfs", ManifestKind::WgetList) => { + Some("https://www.linuxfromscratch.org/lfs/view/12.1/wget-list") + } + ("lfs", ManifestKind::Md5Sums) => { + Some("https://www.linuxfromscratch.org/lfs/view/12.1/md5sums") + } + ("blfs", ManifestKind::WgetList) => { + Some("https://anduin.linuxfromscratch.org/BLFS/view/systemd/wget-list") + } + ("blfs", ManifestKind::Md5Sums) => { + Some("https://anduin.linuxfromscratch.org/BLFS/view/systemd/md5sums") + } + ("glfs", ManifestKind::WgetList) => { + Some("https://www.linuxfromscratch.org/glfs/view/glfs/wget-list") + } + ("glfs", ManifestKind::Md5Sums) => { + Some("https://www.linuxfromscratch.org/glfs/view/glfs/md5sums") + } + _ => None, + } +} + +fn locate_child_anchor(heading: &ElementRef) -> Option<String> { + let mut current = heading.first_child(); + while let Some(node) = current { + if let Some(element) = ElementRef::wrap(node) { + if let Some(id) = element + .value() + .attr("id") + .or_else(|| element.value().attr("name")) + { + return Some(id.to_string()); + } + } + current = node.next_sibling(); + } + None +} + +fn infer_archive_from_commands(document: &Html) -> Option<String> { + let pre_selector = Selector::parse("pre.userinput").unwrap(); + for pre in document.select(&pre_selector) { + let text = pre.text().collect::<Vec<_>>().join("\n"); + for line in text.lines() { + if let Some(start) = line.find("tar -xf") { + let args = line[start + 7..].trim(); + let parts: Vec<&str> = args.split_whitespace().collect(); + if let Some(archive) = parts.get(0) { + let cleaned = archive.trim_matches(['"', '\'', ','].as_ref()); + if cleaned.ends_with(".tar") + || cleaned.contains(".tar.") + || cleaned.ends_with(".tgz") + || cleaned.ends_with(".zip") + { + return Some(cleaned.trim_start_matches("../").to_string()); + } + } + } + } + } + None +} + +fn extract_artifacts(document: &Html) -> (Option<f64>, Option<i64>) { + let seg_selector = Selector::parse("div.segmentedlist div.seg").unwrap(); + let title_selector = Selector::parse("strong.segtitle").unwrap(); + let body_selector = Selector::parse("span.segbody").unwrap(); + let mut sbu = None; + let mut disk = None; + + for seg in document.select(&seg_selector) { + let title = seg + .select(&title_selector) + .next() + .map(|n| normalize_whitespace(&n.text().collect::<Vec<_>>().join(""))); + let body = seg + .select(&body_selector) + .next() + .map(|n| normalize_whitespace(&n.text().collect::<Vec<_>>().join(""))); + + if let (Some(title), Some(body)) = (title, body) { + if title.contains("Approximate build time") { + if let Some(value) = parse_numeric(&body) { + sbu = Some(value); + } + } else if title.contains("Required disk space") { + if let Some(value) = parse_numeric(&body) { + disk = Some(value as i64); + } + } + } + } + + (sbu, disk) +} + +fn parse_numeric(input: &str) -> Option<f64> { + let re = Regex::new(r"([0-9]+(?:\\.[0-9]+)?)").ok()?; + re.captures(input) + .and_then(|caps| caps.get(1)) + .and_then(|m| m.as_str().parse().ok()) +} + +struct BuildStep { + phase: &'static str, + commands: Vec<String>, + cwd: Option<String>, + requires_root: bool, + notes: Option<String>, +} + +fn extract_build_steps(document: &Html) -> Vec<BuildStep> { + let pre_selector = Selector::parse("pre.userinput").unwrap(); + let mut steps = Vec::new(); + + for pre in document.select(&pre_selector) { + let code = pre.text().collect::<Vec<_>>().join("\n"); + let commands: Vec<String> = code + .lines() + .map(|line| line.trim().to_string()) + .filter(|line| !line.is_empty()) + .collect(); + + if commands.is_empty() { + continue; + } + + let phase = classify_phase(&commands); + steps.push(BuildStep { + phase, + commands, + cwd: None, + requires_root: false, + notes: None, + }); + } + + steps +} + +fn classify_phase(commands: &[String]) -> &'static str { + let joined = commands.join("\n").to_lowercase(); + if joined.contains("make install") { + "install" + } else if joined.contains("make -k check") || joined.contains("make check") { + "test" + } else if joined.contains("configure") { + "configure" + } else if joined.contains("tar -xf") || joined.contains("mkdir ") { + "setup" + } else { + "build" + } +} diff --git a/src/db/mod.rs b/src/db/mod.rs new file mode 100644 index 0000000..9cc40b9 --- /dev/null +++ b/src/db/mod.rs @@ -0,0 +1,107 @@ +pub mod models; +pub mod schema; + +use std::env; + +use anyhow::{Context, Result}; +use diesel::prelude::*; +use diesel::r2d2::{self, ConnectionManager}; +use diesel::sqlite::SqliteConnection; + +use crate::pkgs::package::PackageDefinition; + +use self::models::{NewPackage, Package}; +use self::schema::packages::dsl as packages_dsl; + +pub type Pool = r2d2::Pool<ConnectionManager<SqliteConnection>>; +pub type Connection = r2d2::PooledConnection<ConnectionManager<SqliteConnection>>; + +const DEFAULT_DB_URL: &str = "lpkg.db"; + +/// Resolve the database URL from `LPKG_DATABASE_URL` or fall back to `lpkg.db` in the CWD. +pub fn database_url() -> String { + env::var("LPKG_DATABASE_URL").unwrap_or_else(|_| DEFAULT_DB_URL.to_string()) +} + +/// Build an r2d2 connection pool and ensure the schema exists. +pub fn establish_pool() -> Result<Pool> { + let manager = ConnectionManager::<SqliteConnection>::new(database_url()); + let pool = Pool::builder() + .build(manager) + .context("creating Diesel connection pool")?; + + { + let mut conn = pool + .get() + .context("establishing initial database connection")?; + initialize(&mut conn)?; + } + + Ok(pool) +} + +fn initialize(conn: &mut SqliteConnection) -> Result<()> { + diesel::sql_query( + "CREATE TABLE IF NOT EXISTS packages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + version TEXT NOT NULL, + source TEXT, + md5 TEXT, + configure_args TEXT, + build_commands TEXT, + install_commands TEXT, + dependencies TEXT, + enable_lto BOOLEAN NOT NULL DEFAULT 1, + enable_pgo BOOLEAN NOT NULL DEFAULT 1, + cflags TEXT, + ldflags TEXT, + profdata TEXT + )", + ) + .execute(conn) + .context("creating packages table")?; + + diesel::sql_query( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_packages_name_version ON packages(name, version)", + ) + .execute(conn) + .context("creating packages unique index")?; + + Ok(()) +} + +/// Insert or update a package definition in the database. +pub fn upsert_package(conn: &mut SqliteConnection, definition: &PackageDefinition) -> Result<()> { + let record = NewPackage::try_from(definition)?; + + diesel::insert_into(packages_dsl::packages) + .values(&record) + .on_conflict((packages_dsl::name, packages_dsl::version)) + .do_update() + .set(&record) + .execute(conn) + .context("upserting package record")?; + + Ok(()) +} + +/// Convenience helper to upsert via pool and return the persisted definition. +pub fn upsert_package_via_pool(pool: &Pool, definition: &PackageDefinition) -> Result<()> { + let mut conn = pool.get().context("acquiring database connection")?; + upsert_package(&mut conn, definition) +} + +/// Load all packages from the database. +pub fn load_packages(conn: &mut SqliteConnection) -> Result<Vec<Package>> { + packages_dsl::packages + .order((packages_dsl::name, packages_dsl::version)) + .load::<Package>(conn) + .context("loading packages from database") +} + +/// Load packages using the shared pool. +pub fn load_packages_via_pool(pool: &Pool) -> Result<Vec<Package>> { + let mut conn = pool.get().context("acquiring database connection")?; + load_packages(&mut conn) +} diff --git a/src/db/models.rs b/src/db/models.rs new file mode 100644 index 0000000..cf48092 --- /dev/null +++ b/src/db/models.rs @@ -0,0 +1,104 @@ +use anyhow::{Context, Result}; +use diesel::prelude::*; +use serde::{Deserialize, Serialize}; + +use crate::pkgs::package::PackageDefinition; + +use super::schema::packages; + +#[derive(Debug, Queryable, Serialize, Deserialize)] +pub struct Package { + pub id: i32, + pub name: String, + pub version: String, + pub source: Option<String>, + pub md5: Option<String>, + pub configure_args: Option<String>, + pub build_commands: Option<String>, + pub install_commands: Option<String>, + pub dependencies: Option<String>, + pub enable_lto: bool, + pub enable_pgo: bool, + pub cflags: Option<String>, + pub ldflags: Option<String>, + pub profdata: Option<String>, +} + +impl Package { + pub fn into_definition(self) -> Result<PackageDefinition> { + Ok(PackageDefinition { + name: self.name, + version: self.version, + source: self.source, + md5: self.md5, + configure_args: parse_vec(self.configure_args)?, + build_commands: parse_vec(self.build_commands)?, + install_commands: parse_vec(self.install_commands)?, + dependencies: parse_vec(self.dependencies)?, + optimizations: crate::pkgs::package::OptimizationSettings { + enable_lto: self.enable_lto, + enable_pgo: self.enable_pgo, + cflags: parse_vec(self.cflags)?, + ldflags: parse_vec(self.ldflags)?, + profdata: self.profdata, + }, + }) + } +} + +#[derive(Debug, Insertable, AsChangeset)] +#[diesel(table_name = packages)] +pub struct NewPackage { + pub name: String, + pub version: String, + pub source: Option<String>, + pub md5: Option<String>, + pub configure_args: Option<String>, + pub build_commands: Option<String>, + pub install_commands: Option<String>, + pub dependencies: Option<String>, + pub enable_lto: bool, + pub enable_pgo: bool, + pub cflags: Option<String>, + pub ldflags: Option<String>, + pub profdata: Option<String>, +} + +impl TryFrom<&PackageDefinition> for NewPackage { + type Error = anyhow::Error; + + fn try_from(value: &PackageDefinition) -> Result<Self> { + Ok(Self { + name: value.name.clone(), + version: value.version.clone(), + source: value.source.clone(), + md5: value.md5.clone(), + configure_args: serialize_vec(&value.configure_args)?, + build_commands: serialize_vec(&value.build_commands)?, + install_commands: serialize_vec(&value.install_commands)?, + dependencies: serialize_vec(&value.dependencies)?, + enable_lto: value.optimizations.enable_lto, + enable_pgo: value.optimizations.enable_pgo, + cflags: serialize_vec(&value.optimizations.cflags)?, + ldflags: serialize_vec(&value.optimizations.ldflags)?, + profdata: value.optimizations.profdata.clone(), + }) + } +} + +fn serialize_vec(values: &[String]) -> Result<Option<String>> { + if values.is_empty() { + Ok(None) + } else { + serde_json::to_string(values) + .map(Some) + .context("serializing vector to JSON") + } +} + +fn parse_vec(raw: Option<String>) -> Result<Vec<String>> { + match raw { + Some(data) => serde_json::from_str(&data).context("parsing JSON vector"), + None => Ok(Vec::new()), + } +} diff --git a/src/db/schema.rs b/src/db/schema.rs new file mode 100644 index 0000000..9ecfa1e --- /dev/null +++ b/src/db/schema.rs @@ -0,0 +1,19 @@ +// Diesel schema for package storage. Maintained manually to avoid build-script dependency. +diesel::table! { + packages (id) { + id -> Integer, + name -> Text, + version -> Text, + source -> Nullable<Text>, + md5 -> Nullable<Text>, + configure_args -> Nullable<Text>, + build_commands -> Nullable<Text>, + install_commands -> Nullable<Text>, + dependencies -> Nullable<Text>, + enable_lto -> Bool, + enable_pgo -> Bool, + cflags -> Nullable<Text>, + ldflags -> Nullable<Text>, + profdata -> Nullable<Text>, + } +} diff --git a/src/ingest/blfs.rs b/src/ingest/blfs.rs new file mode 100644 index 0000000..850b46d --- /dev/null +++ b/src/ingest/blfs.rs @@ -0,0 +1,113 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use reqwest::blocking::Client; +use scraper::{Html, Selector}; + +use super::{BookPackage, FetchOptions}; +use crate::ingest::lfs::split_name_version; + +pub fn fetch_book(options: &FetchOptions) -> Result<Vec<BookPackage>> { + let base = options.base_url.trim_end_matches('/'); + let url = format!("{base}/book.html"); + + let client = Client::builder().build().context("building HTTP client")?; + let body = client + .get(&url) + .send() + .with_context(|| format!("fetching {}", url))? + .error_for_status() + .with_context(|| format!("request failed for {}", url))? + .text() + .context("reading response body")?; + + parse_book_html(options, &url, &body) +} + +pub fn parse_book_html( + options: &FetchOptions, + book_url: &str, + body: &str, +) -> Result<Vec<BookPackage>> { + let document = Html::parse_document(body); + let selector = Selector::parse("h1.sect1").unwrap(); + let numbering_re = + Regex::new(r"^(?P<chapter>\d+)\.(?P<section>\d+)\.\s+(?P<title>.+)$").unwrap(); + + let mut results = Vec::new(); + + for heading in document.select(&selector) { + let text = heading + .text() + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect::<Vec<_>>() + .join(" ") + .replace('\n', " ") + .trim() + .to_string(); + if text.is_empty() { + continue; + } + + // BLFS headings often look like "33.2. Bzip2" or "33.2. Bzip2-1.0.8" + let caps = match numbering_re.captures(&text) { + Some(caps) => caps, + None => continue, + }; + + let chapter_num: u32 = caps["chapter"].parse().unwrap_or(0); + let section_num: u32 = caps["section"].parse().unwrap_or(0); + let title = caps["title"].trim(); + + let (name, version, variant) = match split_name_version(title) { + Some(parts) => parts, + None => continue, + }; + + let href = heading.value().id().map(|id| { + let mut base = book_url.to_string(); + if !base.contains('#') { + base.push('#'); + } + format!("{}{}", base, id) + }); + + let section_label = Some(format!("{}.{}", chapter_num, section_num)); + + results.push(BookPackage { + book: options.book, + chapter: Some(chapter_num), + section: section_label, + name, + version: Some(version), + href, + md5: None, + stage: None, + variant, + notes: None, + }); + } + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ingest::BookKind; + + #[test] + fn parse_blfs_sample() { + let html = r#" + <html><body> + <h1 class=\"sect1\" id=\"ch33-bzip2\">33.2. Bzip2-1.0.8</h1> + <h1 class=\"sect1\" id=\"ch33-about\">33.1. Introduction</h1> + </body></html> + "#; + let opts = FetchOptions::new("https://example.invalid/blfs", BookKind::Blfs); + let items = parse_book_html(&opts, "https://example.invalid/blfs/book.html", html).unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0].name, "Bzip2"); + assert_eq!(items[0].version.as_deref(), Some("1.0.8")); + } +} diff --git a/src/ingest/glfs.rs b/src/ingest/glfs.rs new file mode 100644 index 0000000..3fb7dff --- /dev/null +++ b/src/ingest/glfs.rs @@ -0,0 +1,109 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use reqwest::blocking::Client; +use scraper::{Html, Selector}; + +use super::{BookPackage, FetchOptions}; +use crate::ingest::lfs::split_name_version; + +pub fn fetch_book(options: &FetchOptions) -> Result<Vec<BookPackage>> { + let base = options.base_url.trim_end_matches('/'); + let url = format!("{base}/book.html"); + + let client = Client::builder().build().context("building HTTP client")?; + let body = client + .get(&url) + .send() + .with_context(|| format!("fetching {}", url))? + .error_for_status() + .with_context(|| format!("request failed for {}", url))? + .text() + .context("reading response body")?; + + parse_book_html(options, &url, &body) +} + +pub fn parse_book_html( + options: &FetchOptions, + book_url: &str, + body: &str, +) -> Result<Vec<BookPackage>> { + let document = Html::parse_document(body); + let selector = Selector::parse("h1.sect1").unwrap(); + let numbering_re = + Regex::new(r"^(?P<chapter>\d+)\.(?P<section>\d+)\.\s+(?P<title>.+)$").unwrap(); + + let mut results = Vec::new(); + + for heading in document.select(&selector) { + let text = heading + .text() + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect::<Vec<_>>() + .join(" ") + .replace('\n', " ") + .trim() + .to_string(); + if text.is_empty() { + continue; + } + + let caps = match numbering_re.captures(&text) { + Some(caps) => caps, + None => continue, + }; + + let chapter_num: u32 = caps["chapter"].parse().unwrap_or(0); + let section_num: u32 = caps["section"].parse().unwrap_or(0); + let title = caps["title"].trim(); + + let (name, version, variant) = match split_name_version(title) { + Some(parts) => parts, + None => continue, + }; + + let href = heading.value().id().map(|id| { + let mut base = book_url.to_string(); + if !base.contains('#') { + base.push('#'); + } + format!("{}{}", base, id) + }); + + results.push(BookPackage { + book: options.book, + chapter: Some(chapter_num), + section: Some(format!("{}.{}", chapter_num, section_num)), + name, + version: Some(version), + href, + md5: None, + stage: None, + variant, + notes: None, + }); + } + + Ok(results) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ingest::BookKind; + + #[test] + fn parse_glfs_sample() { + let html = r#" + <html><body> + <h1 class=\"sect1\" id=\"ch12-coreutils\">12.4. Coreutils-9.8</h1> + </body></html> + "#; + let opts = FetchOptions::new("https://example.invalid/glfs", BookKind::Glfs); + let items = parse_book_html(&opts, "https://example.invalid/glfs/book.html", html).unwrap(); + assert_eq!(items.len(), 1); + assert_eq!(items[0].name, "Coreutils"); + assert_eq!(items[0].version.as_deref(), Some("9.8")); + } +} diff --git a/src/ingest/lfs.rs b/src/ingest/lfs.rs new file mode 100644 index 0000000..a9d2f37 --- /dev/null +++ b/src/ingest/lfs.rs @@ -0,0 +1,169 @@ +use anyhow::{Context, Result}; +use regex::Regex; +use reqwest::blocking::Client; +use scraper::{Html, Selector}; + +use super::{BookPackage, FetchOptions}; + +pub fn fetch_book(options: &FetchOptions) -> Result<Vec<BookPackage>> { + let base = options.base_url.trim_end_matches('/'); + let url = format!("{base}/book.html"); + + let client = Client::builder().build().context("building HTTP client")?; + let body = client + .get(&url) + .send() + .with_context(|| format!("fetching {}", url))? + .error_for_status() + .with_context(|| format!("request failed for {}", url))? + .text() + .context("reading response body")?; + + parse_book_html(options, &url, &body) +} + +pub fn parse_book_html( + options: &FetchOptions, + book_url: &str, + body: &str, +) -> Result<Vec<BookPackage>> { + let document = Html::parse_document(body); + let selector = Selector::parse("h1.sect1").unwrap(); + + let numbering_re = + Regex::new(r"^(?P<chapter>\d+)\.(?P<section>\d+)\.\s+(?P<title>.+)$").unwrap(); + + let mut results = Vec::new(); + + for heading in document.select(&selector) { + let text = heading + .text() + .map(str::trim) + .filter(|s| !s.is_empty()) + .collect::<Vec<_>>() + .join(" ") + .replace('\n', " ") + .trim() + .to_string(); + if text.is_empty() { + continue; + } + + let caps = match numbering_re.captures(&text) { + Some(caps) => caps, + None => continue, + }; + let chapter_num: u32 = caps["chapter"].parse().unwrap_or(0); + let section_num: u32 = caps["section"].parse().unwrap_or(0); + let title = caps["title"].trim(); + + let (name, version, variant) = match split_name_version(title) { + Some(parts) => parts, + None => continue, + }; + + let stage = stage_for_chapter(chapter_num).map(|s| s.to_string()); + let identifier = format!("{chapter_num}.{section_num:02}"); + + let href = heading.value().id().map(|id| { + let mut base = book_url.to_string(); + if !base.contains('#') { + base.push('#'); + } + format!("{}{}", base, id) + }); + + results.push(BookPackage { + book: options.book, + chapter: Some(chapter_num), + section: Some(identifier), + name, + version: Some(version), + href, + md5: None, + stage, + variant, + notes: None, + }); + } + + Ok(results) +} + +pub(crate) fn split_name_version(title: &str) -> Option<(String, String, Option<String>)> { + // Find the last '-' whose next character is a digit (start of version) + let bytes = title.as_bytes(); + for idx in (0..bytes.len()).rev() { + if bytes[idx] == b'-' { + if let Some(next) = bytes.get(idx + 1) { + if next.is_ascii_digit() { + let name = title[..idx].trim(); + let mut remainder = title[idx + 1..].trim(); + if name.is_empty() || remainder.is_empty() { + return None; + } + + let mut variant = None; + if let Some(pos) = remainder.find(" - ") { + variant = Some(remainder[pos + 3..].trim().to_string()); + remainder = remainder[..pos].trim(); + } else if let Some(pos) = remainder.find(" (") { + let note = remainder[pos + 1..].trim_end_matches(')').trim(); + variant = Some(note.to_string()); + remainder = remainder[..pos].trim(); + } + + return Some((name.to_string(), remainder.to_string(), variant)); + } + } + } + } + None +} + +fn stage_for_chapter(chapter: u32) -> Option<&'static str> { + match chapter { + 5 => Some("cross-toolchain"), + 6 | 7 => Some("temporary-tools"), + 8 => Some("system"), + 9 => Some("system-configuration"), + 10 => Some("system-finalization"), + _ => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::ingest::BookKind; + use scraper::{Html, Selector}; + + #[test] + fn parse_sample_headings() { + let html = r#" + <html><body> + <h1 class=\"sect1\" id=\"ch05-binutils-pass1\">5.5. Binutils-2.45 - Pass 1</h1> + <h1 class=\"sect1\" id=\"ch05-gcc-pass1\">5.6. GCC-15.2.0 - Pass 1</h1> + <h1 class=\"sect1\" id=\"ch09-bootscripts\">9.3. LFS-Bootscripts-20250827</h1> + <h1 class=\"sect1\" id=\"ch08-xml-parser\">8.41. XML::Parser-2.47</h1> + </body></html> + "#; + let opts = FetchOptions::new("https://example.invalid/lfs", BookKind::Mlfs); + let document = Html::parse_document(html); + let selector = Selector::parse("h1.sect1").unwrap(); + assert!( + document.select(&selector).next().is_some(), + "sample headings selector returned no nodes" + ); + let packages = + parse_book_html(&opts, "https://example.invalid/lfs/book.html", html).unwrap(); + assert_eq!(packages.len(), 4); + assert_eq!(packages[0].name, "Binutils"); + assert_eq!(packages[0].version.as_deref(), Some("2.45")); + assert_eq!(packages[0].variant.as_deref(), Some("Pass 1")); + assert_eq!(packages[0].stage.as_deref(), Some("cross-toolchain")); + assert_eq!(packages[1].variant.as_deref(), Some("Pass 1")); + assert_eq!(packages[2].variant, None); + assert_eq!(packages[3].name, "XML::Parser"); + } +} diff --git a/src/ingest/mod.rs b/src/ingest/mod.rs new file mode 100644 index 0000000..cb88b9e --- /dev/null +++ b/src/ingest/mod.rs @@ -0,0 +1,67 @@ +pub mod blfs; +pub mod glfs; +pub mod lfs; + +use std::fmt; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BookKind { + Lfs, + Mlfs, + Blfs, + Glfs, +} + +impl fmt::Display for BookKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let label = match self { + BookKind::Lfs => "lfs", + BookKind::Mlfs => "mlfs", + BookKind::Blfs => "blfs", + BookKind::Glfs => "glfs", + }; + f.write_str(label) + } +} + +#[derive(Debug, Clone)] +pub struct BookPackage { + pub book: BookKind, + pub chapter: Option<u32>, + pub section: Option<String>, + pub name: String, + pub version: Option<String>, + pub href: Option<String>, + pub md5: Option<String>, + pub stage: Option<String>, + pub variant: Option<String>, + pub notes: Option<String>, +} + +impl BookPackage { + pub fn identifier(&self) -> String { + match &self.variant { + Some(variant) if !variant.is_empty() => { + format!( + "{}-{}-{}", + self.book, + self.name, + variant.replace(' ', "-").to_lowercase() + ) + } + _ => format!("{}-{}", self.book, self.name), + } + } +} + +#[derive(Debug, Clone)] +pub struct FetchOptions<'a> { + pub base_url: &'a str, + pub book: BookKind, +} + +impl<'a> FetchOptions<'a> { + pub fn new(base_url: &'a str, book: BookKind) -> Self { + Self { base_url, book } + } +} diff --git a/src/lib.rs b/src/lib.rs index 04159f2..e28f156 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,2 +1,12 @@ +pub mod ai; +pub mod db; +pub mod html; +pub mod ingest; +pub mod md5_utils; +pub mod mirrors; pub mod pkgs; +pub mod version_check; +pub mod wget_list; + +#[cfg(feature = "tui")] pub mod tui; diff --git a/src/main.rs b/src/main.rs index 79dd5fe..44a7b0a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,427 @@ -mod tui; +use std::{collections::BTreeSet, env, fs, path::PathBuf}; + +use anyhow::{Context, Result, anyhow}; +use clap::{CommandFactory, Parser, Subcommand}; + +use package_management::{ + db, html, md5_utils, + pkgs::{ + by_name::bi::binutils::cross_toolchain::build_binutils_from_page, + mlfs, + scaffolder::{self, ScaffoldRequest}, + }, + version_check, wget_list, +}; + +#[cfg(feature = "tui")] +use package_management::tui::disk_manager::DiskManager; + +#[derive(Parser)] +#[command(name = "lpkg", version, about = "LPKG – Lightweight Package Manager", long_about = None)] +struct Cli { + /// Command to run. Defaults to launching the TUI (when available). + #[command(subcommand)] + command: Option<Command>, +} + +#[derive(Subcommand)] +enum Command { + /// Run one of the automated workflows. + Workflow { + #[command(subcommand)] + workflow: WorkflowCommand, + }, + /// Launch interactive terminal UIs. + #[cfg(feature = "tui")] + #[command(subcommand)] + Tui(TuiCommand), +} + +#[derive(Subcommand)] +enum WorkflowCommand { + /// Fetch <pre> blocks from the given URL and run version checks found inside them. + EnvCheck { + /// URL of the Linux From Scratch page containing ver_check/ver_kernel snippets. + url: String, + }, + /// Download the LFS wget-list and md5sums, optionally writing them to disk. + FetchManifests { + /// Output directory to store wget-list and md5sums files. Uses current dir if omitted. + #[arg(long)] + output: Option<PathBuf>, + }, + /// Parse the Binutils Pass 1 page and build it using the extracted steps. + BuildBinutils { + /// URL of the Binutils Pass 1 instructions to parse. + url: String, + /// Root directory of the LFS workspace (used for $LFS paths). + #[arg(long = "lfs-root")] + lfs_root: PathBuf, + /// Optional explicit cross-compilation target (defaults to $LFS_TGT env or x86_64-lfs-linux-gnu). + #[arg(long)] + target: Option<String>, + }, + /// Scaffold a new package module under `src/pkgs/by_name` with tuned optimizations. + ScaffoldPackage { + /// Logical package name (used for module layout and metadata). + #[arg(long)] + name: String, + /// Upstream version string. + #[arg(long)] + version: String, + /// Optional source archive URL. + #[arg(long)] + source: Option<String>, + /// Optional MD5 checksum of the source archive. + #[arg(long)] + md5: Option<String>, + /// Additional configure arguments (repeat flag). + #[arg(long = "configure-arg", value_name = "ARG")] + configure_arg: Vec<String>, + /// Build commands (repeat flag). + #[arg(long = "build-cmd", value_name = "CMD")] + build_cmd: Vec<String>, + /// Install commands (repeat flag). + #[arg(long = "install-cmd", value_name = "CMD")] + install_cmd: Vec<String>, + /// Declared dependencies (repeat flag). + #[arg(long = "dependency", value_name = "PKG")] + dependency: Vec<String>, + /// Whether to enable LTO (defaults to true). + #[arg(long = "enable-lto", default_value_t = true)] + enable_lto: bool, + /// Whether to enable PGO instrumentation/use (defaults to true). + #[arg(long = "enable-pgo", default_value_t = true)] + enable_pgo: bool, + /// Additional CFLAGS (repeat flag). + #[arg(long = "cflag", value_name = "FLAG")] + cflag: Vec<String>, + /// Additional LDFLAGS (repeat flag). + #[arg(long = "ldflag", value_name = "FLAG")] + ldflag: Vec<String>, + /// Optional profile data file name for PGO replay (enables -fprofile-use). + #[arg(long)] + profdata: Option<String>, + /// Base directory for module generation (defaults to src/pkgs/by_name). + #[arg(long, default_value = "src/pkgs/by_name")] + base: PathBuf, + }, + /// Import all packages from the MLFS catalogue, scaffolding modules and persisting metadata. + ImportMlfs { + /// Perform a dry run without writing files or touching the database. + #[arg(long, default_value_t = false)] + dry_run: bool, + /// Only process the first N records (after deduplication). + #[arg(long)] + limit: Option<usize>, + /// Base directory for module generation (defaults to src/pkgs/by_name). + #[arg(long, default_value = "src/pkgs/by_name")] + base: PathBuf, + /// Overwrite existing modules by deleting and regenerating them. + #[arg(long, default_value_t = false)] + overwrite: bool, + /// Source URL for the MLFS book (defaults to the canonical mirror). + #[arg(long = "source-url")] + source_url: Option<String>, + }, +} + +#[cfg(feature = "tui")] +#[derive(Subcommand)] +enum TuiCommand { + /// Launch the disk manager UI. + DiskManager, +} + +fn main() -> Result<()> { + let _ = tracing_subscriber::fmt::try_init(); + + let cli = Cli::parse(); + + match cli.command { + Some(Command::Workflow { workflow }) => run_workflow(workflow)?, + #[cfg(feature = "tui")] + Some(Command::Tui(cmd)) => run_tui(cmd)?, + None => { + #[cfg(feature = "tui")] + { + println!( + "No command specified. Launching disk manager TUI. Use `lpkg help` for more options." + ); + DiskManager::run_tui().map_err(|e| anyhow!(e.to_string()))?; + } + + #[cfg(not(feature = "tui"))] + { + Cli::command().print_help()?; + println!(); + } + } + } + + Ok(()) +} + +fn run_workflow(cmd: WorkflowCommand) -> Result<()> { + match cmd { + WorkflowCommand::EnvCheck { url } => { + let pre_blocks = html::fetch_pre_blocks(&url) + .with_context(|| format!("Fetching HTML `<pre>` blocks from {url}"))?; + + let mut ran_any = false; + let mut failures = Vec::new(); + + for (idx, block) in pre_blocks.iter().enumerate() { + if !(block.contains("ver_check") || block.contains("ver_kernel")) { + continue; + } + + ran_any = true; + println!("Running version checks from block #{idx}..."); + if !version_check::run_version_checks_from_block(block) { + failures.push(idx + 1); + } + } + + if !ran_any { + return Err(anyhow!( + "No ver_check or ver_kernel snippets found at {url}." + )); + } + + if !failures.is_empty() { + return Err(anyhow!("Version checks failed in block(s): {:?}", failures)); + } + + println!("All version checks passed 👍"); + } + WorkflowCommand::FetchManifests { output } => { + let wget_list = wget_list::get_wget_list().context("Fetching wget-list")?; + let md5sums = md5_utils::get_md5sums().context("Fetching md5sums")?; + + println!("Fetched wget-list ({} bytes)", wget_list.len()); + println!("Fetched md5sums ({} bytes)", md5sums.len()); + + let target_dir = output.unwrap_or(std::env::current_dir()?); + fs::create_dir_all(&target_dir) + .with_context(|| format!("Creating output directory at {:?}", target_dir))?; + + let wget_path = target_dir.join("wget-list"); + let md5_path = target_dir.join("md5sums"); + + fs::write(&wget_path, wget_list).with_context(|| format!("Writing {wget_path:?}"))?; + fs::write(&md5_path, md5sums).with_context(|| format!("Writing {md5_path:?}"))?; + + println!("Saved artifacts to {:?} and {:?}", wget_path, md5_path); + } + WorkflowCommand::BuildBinutils { + url, + lfs_root, + target, + } => { + let runtime = tokio::runtime::Runtime::new().context("Creating async runtime")?; + runtime + .block_on(build_binutils_from_page(&url, &lfs_root, target)) + .map_err(|e| anyhow!("Building Binutils using instructions from {url}: {e}"))?; + + println!("Binutils workflow completed successfully"); + } + WorkflowCommand::ScaffoldPackage { + name, + version, + source, + md5, + configure_arg, + build_cmd, + install_cmd, + dependency, + enable_lto, + enable_pgo, + cflag, + ldflag, + profdata, + base, + } => { + let base_dir = if base.is_relative() { + env::current_dir() + .context("Resolving scaffold base directory")? + .join(base) + } else { + base + }; + + let request = ScaffoldRequest { + name: name.clone(), + version: version.clone(), + source, + md5, + configure_args: configure_arg, + build_commands: build_cmd, + install_commands: install_cmd, + dependencies: dependency, + enable_lto, + enable_pgo, + cflags: cflag, + ldflags: ldflag, + profdata, + stage: None, + variant: None, + notes: None, + module_override: None, + }; + + let scaffold = scaffolder::scaffold_package(&base_dir, request) + .with_context(|| format!("Scaffolding package {name}"))?; + + let pool = db::establish_pool().context("Setting up package database")?; + db::upsert_package_via_pool(&pool, &scaffold.definition) + .with_context(|| format!("Persisting package metadata for {name}"))?; + + println!("Generated module: {:?}", scaffold.module_path); + println!( + "Remember to stage and commit as `{name}: init at {version}` after reviewing the template" + ); + } + WorkflowCommand::ImportMlfs { + dry_run, + limit, + base, + overwrite, + source_url, + } => { + let base_dir = if base.is_relative() { + env::current_dir() + .context("Resolving MLFS scaffold base directory")? + .join(base) + } else { + base + }; + + let mut records = mlfs::load_or_fetch_catalog(source_url.as_deref()) + .context("Loading MLFS catalogue")?; + records.sort_by(|a, b| a.name.cmp(&b.name).then(a.variant.cmp(&b.variant))); + + let mut seen = BTreeSet::new(); + let mut processed = 0usize; + let mut created = 0usize; + let mut skipped = Vec::new(); + + let pool = if dry_run { + None + } else { + Some(db::establish_pool().context("Setting up package database")?) + }; + + for record in records { + let module_alias = record.module_alias(); + if !seen.insert(module_alias.clone()) { + continue; + } + + if let Some(limit) = limit { + if processed >= limit { + break; + } + } + processed += 1; + + if dry_run { + println!( + "Would scaffold {:<18} {:<12} -> {}", + record.name, record.version, module_alias + ); + continue; + } + + let request = ScaffoldRequest { + name: record.name.clone(), + version: record.version.clone(), + source: None, + md5: None, + configure_args: Vec::new(), + build_commands: Vec::new(), + install_commands: Vec::new(), + dependencies: Vec::new(), + enable_lto: true, + enable_pgo: true, + cflags: Vec::new(), + ldflags: Vec::new(), + profdata: None, + stage: record.stage.clone(), + variant: record.variant.clone(), + notes: record.notes.clone(), + module_override: Some(module_alias.clone()), + }; + + match scaffolder::scaffold_package(&base_dir, request) { + Ok(result) => { + if let Some(pool) = &pool { + db::upsert_package_via_pool(pool, &result.definition).with_context( + || { + format!( + "Persisting MLFS package metadata for {} {}", + record.name, record.version + ) + }, + )?; + } + println!( + "Scaffolded {:<18} {:<12} -> {}", + record.name, record.version, module_alias + ); + created += 1; + } + Err(err) => { + let already_exists = + err.to_string().to_lowercase().contains("already exists"); + if already_exists && !overwrite { + skipped.push(module_alias); + } else { + return Err(err); + } + } + } + } + + if dry_run { + println!( + "Dry run complete. {} package definitions queued.", + processed + ); + } else { + println!( + "MLFS import complete. Created {} modules, skipped {} (already existed).", + created, + skipped.len() + ); + if !skipped.is_empty() { + println!( + "Skipped modules: {}", + skipped + .iter() + .take(10) + .cloned() + .collect::<Vec<_>>() + .join(", ") + ); + if skipped.len() > 10 { + println!("... and {} more", skipped.len() - 10); + } + } + } + } + } + + Ok(()) +} + +#[cfg(feature = "tui")] +fn run_tui(cmd: TuiCommand) -> Result<()> { + match cmd { + TuiCommand::DiskManager => { + DiskManager::run_tui().map_err(|e| anyhow!(e.to_string()))?; + } + } -fn main() -> Result<(), Box<dyn std::error::Error>> { - tui::disk_manager::DiskManager::run_tui()?; Ok(()) } diff --git a/src/pkgs/mlfs.rs b/src/pkgs/mlfs.rs new file mode 100644 index 0000000..214da30 --- /dev/null +++ b/src/pkgs/mlfs.rs @@ -0,0 +1,116 @@ +use std::borrow::Cow; + +use anyhow::{Context, Result, anyhow}; +use serde::{Deserialize, Serialize}; + +use crate::ingest::{BookKind, BookPackage, FetchOptions, lfs}; +use crate::pkgs::package::PackageDefinition; + +pub const DEFAULT_MLFS_BASE_URL: &str = "https://linuxfromscratch.org/~thomas/multilib-m32"; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct MlfsPackageRecord { + pub name: String, + pub version: String, + pub chapter: Option<u32>, + pub section: Option<String>, + #[serde(default)] + pub stage: Option<String>, + #[serde(default)] + pub variant: Option<String>, + #[serde(default)] + pub notes: Option<String>, +} + +impl MlfsPackageRecord { + pub fn id(&self) -> String { + let mut id = self.name.replace('+', "plus"); + if let Some(variant) = &self.variant { + id.push('_'); + id.push_str(&variant.replace('-', "_")); + } + id + } + + pub fn module_alias(&self) -> String { + self.id() + .replace('.', "_") + .replace('/', "_") + .replace(' ', "_") + .to_lowercase() + } + + pub fn display_label(&self) -> Cow<'_, str> { + match (&self.section, &self.variant) { + (Some(section), Some(variant)) => Cow::from(format!("{} ({})", section, variant)), + (Some(section), None) => Cow::from(section.as_str()), + (None, Some(variant)) => Cow::from(variant.as_str()), + _ => Cow::from(self.name.as_str()), + } + } + + pub fn to_package_definition(&self) -> PackageDefinition { + let mut pkg = PackageDefinition::new(&self.name, &self.version); + if let Some(stage) = &self.stage { + pkg.optimizations + .cflags + .push(format!("-DLPKG_STAGE={}", stage.to_uppercase())); + } + if let Some(variant) = &self.variant { + pkg.optimizations + .cflags + .push(format!("-DLPKG_VARIANT={}", variant.to_uppercase())); + } + if let Some(notes) = &self.notes { + pkg.optimizations + .cflags + .push(format!("-DLPKG_NOTES={}", notes.replace(' ', "_"))); + } + pkg + } + + fn from_book_package(pkg: BookPackage) -> Option<Self> { + let version = pkg.version?; + Some(Self { + name: pkg.name, + version, + chapter: pkg.chapter, + section: pkg.section, + stage: pkg.stage, + variant: pkg.variant, + notes: pkg.notes, + }) + } +} + +pub fn fetch_catalog(base_url: &str) -> Result<Vec<MlfsPackageRecord>> { + let options = FetchOptions::new(base_url, BookKind::Mlfs); + let packages = lfs::fetch_book(&options)?; + let mut records = packages + .into_iter() + .filter_map(MlfsPackageRecord::from_book_package) + .collect::<Vec<_>>(); + if records.is_empty() { + return Err(anyhow!("No packages parsed from MLFS book at {base_url}.")); + } + records.sort_by(|a, b| a.name.cmp(&b.name).then(a.variant.cmp(&b.variant))); + Ok(records) +} + +pub fn load_cached_catalog() -> Result<Vec<MlfsPackageRecord>> { + let raw = include_str!("../../data/mlfs_ml-12.4-40-multilib.json"); + let records: Vec<MlfsPackageRecord> = + serde_json::from_str(raw).context("parsing cached MLFS package manifest")?; + Ok(records) +} + +pub fn load_or_fetch_catalog(base_url: Option<&str>) -> Result<Vec<MlfsPackageRecord>> { + let base = base_url.unwrap_or(DEFAULT_MLFS_BASE_URL); + match fetch_catalog(base) { + Ok(records) => Ok(records), + Err(err) => { + tracing::warn!("mlfs_fetch_error" = %err, "Falling back to cached MLFS package list"); + load_cached_catalog() + } + } +} diff --git a/src/pkgs/mod.rs b/src/pkgs/mod.rs index 9dacb43..90957bc 100644 --- a/src/pkgs/mod.rs +++ b/src/pkgs/mod.rs @@ -1 +1,4 @@ pub mod by_name; +pub mod mlfs; +pub mod package; +pub mod scaffolder; diff --git a/src/pkgs/package.rs b/src/pkgs/package.rs new file mode 100644 index 0000000..2513025 --- /dev/null +++ b/src/pkgs/package.rs @@ -0,0 +1,74 @@ +use serde::{Deserialize, Serialize}; + +/// High-level description of a package managed by LPKG. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PackageDefinition { + pub name: String, + pub version: String, + pub source: Option<String>, + pub md5: Option<String>, + pub configure_args: Vec<String>, + pub build_commands: Vec<String>, + pub install_commands: Vec<String>, + pub dependencies: Vec<String>, + pub optimizations: OptimizationSettings, +} + +impl PackageDefinition { + pub fn new(name: impl Into<String>, version: impl Into<String>) -> Self { + Self { + name: name.into(), + version: version.into(), + source: None, + md5: None, + configure_args: Vec::new(), + build_commands: Vec::new(), + install_commands: Vec::new(), + dependencies: Vec::new(), + optimizations: OptimizationSettings::default(), + } + } +} + +/// Tunable compiler and linker flags applied during package builds. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct OptimizationSettings { + pub enable_lto: bool, + pub enable_pgo: bool, + pub cflags: Vec<String>, + pub ldflags: Vec<String>, + pub profdata: Option<String>, +} + +impl Default for OptimizationSettings { + fn default() -> Self { + Self { + enable_lto: true, + enable_pgo: true, + cflags: vec![ + "-O3".to_string(), + "-flto".to_string(), + "-fprofile-generate".to_string(), + ], + ldflags: vec!["-flto".to_string(), "-fprofile-generate".to_string()], + profdata: None, + } + } +} + +impl OptimizationSettings { + /// Convenience helper for disabling instrumentation once profile data has been gathered. + pub fn for_pgo_replay(profdata: impl Into<String>) -> Self { + Self { + enable_lto: true, + enable_pgo: true, + cflags: vec![ + "-O3".to_string(), + "-flto".to_string(), + "-fprofile-use".to_string(), + ], + ldflags: vec!["-flto".to_string(), "-fprofile-use".to_string()], + profdata: Some(profdata.into()), + } + } +} diff --git a/src/pkgs/scaffolder.rs b/src/pkgs/scaffolder.rs new file mode 100644 index 0000000..cc67ffc --- /dev/null +++ b/src/pkgs/scaffolder.rs @@ -0,0 +1,293 @@ +use std::fs::{self, OpenOptions}; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use anyhow::{Context, Result, anyhow}; + +use crate::pkgs::package::{OptimizationSettings, PackageDefinition}; + +#[derive(Debug, Clone)] +pub struct ScaffoldRequest { + pub name: String, + pub version: String, + pub source: Option<String>, + pub md5: Option<String>, + pub configure_args: Vec<String>, + pub build_commands: Vec<String>, + pub install_commands: Vec<String>, + pub dependencies: Vec<String>, + pub enable_lto: bool, + pub enable_pgo: bool, + pub cflags: Vec<String>, + pub ldflags: Vec<String>, + pub profdata: Option<String>, + pub stage: Option<String>, + pub variant: Option<String>, + pub notes: Option<String>, + pub module_override: Option<String>, +} + +#[derive(Debug, Clone)] +pub struct ScaffoldResult { + pub module_path: PathBuf, + pub prefix_module: PathBuf, + pub by_name_module: PathBuf, + pub definition: PackageDefinition, +} + +pub fn scaffold_package( + base_dir: impl AsRef<Path>, + request: ScaffoldRequest, +) -> Result<ScaffoldResult> { + let base_dir = base_dir.as_ref(); + if !base_dir.ends_with("by_name") { + return Err(anyhow!("expected base directory ending with 'by_name'")); + } + + let module_source_name = request.module_override.as_deref().unwrap_or(&request.name); + let module_name = sanitize(module_source_name); + let prefix = prefix(&module_name); + + let prefix_dir = base_dir.join(&prefix); + fs::create_dir_all(&prefix_dir) + .with_context(|| format!("creating prefix directory {:?}", prefix_dir))?; + + let by_name_mod = base_dir.join("mod.rs"); + ensure_mod_entry(&by_name_mod, &prefix)?; + + let prefix_mod = prefix_dir.join("mod.rs"); + ensure_mod_entry(&prefix_mod, &module_name)?; + + let package_dir = prefix_dir.join(&module_name); + if package_dir.exists() { + return Err(anyhow!("package module {:?} already exists", package_dir)); + } + fs::create_dir_all(&package_dir) + .with_context(|| format!("creating package directory {:?}", package_dir))?; + + let module_path = package_dir.join("mod.rs"); + let definition = build_definition(&request); + let source = generate_module_source(&request, &definition); + fs::write(&module_path, source) + .with_context(|| format!("writing module source to {:?}", module_path))?; + + Ok(ScaffoldResult { + module_path, + prefix_module: prefix_mod, + by_name_module: by_name_mod, + definition, + }) +} + +fn ensure_mod_entry(path: &Path, module: &str) -> Result<()> { + let entry = format!("pub mod {};", module); + if path.exists() { + let contents = + fs::read_to_string(path).with_context(|| format!("reading module file {:?}", path))?; + if contents.contains(&entry) || contents.contains(&entry.trim()) { + return Ok(()); + } + let mut file = OpenOptions::new() + .append(true) + .open(path) + .with_context(|| format!("opening module file {:?}", path))?; + writeln!(file, "pub mod {};", module) + .with_context(|| format!("appending to module file {:?}", path))?; + } else { + fs::write(path, format!("pub mod {};\n", module)) + .with_context(|| format!("creating module file {:?}", path))?; + } + Ok(()) +} + +fn build_definition(request: &ScaffoldRequest) -> PackageDefinition { + let mut pkg = PackageDefinition::new(&request.name, &request.version); + pkg.source = request.source.clone(); + pkg.md5 = request.md5.clone(); + pkg.configure_args = request.configure_args.clone(); + pkg.build_commands = request.build_commands.clone(); + pkg.install_commands = request.install_commands.clone(); + pkg.dependencies = request.dependencies.clone(); + + let mut cflags = if request.cflags.is_empty() { + default_cflags(request) + } else { + request.cflags.clone() + }; + let mut ldflags = if request.ldflags.is_empty() { + default_ldflags(request) + } else { + request.ldflags.clone() + }; + dedup(&mut cflags); + dedup(&mut ldflags); + + let profdata = request.profdata.clone(); + let profdata_clone = profdata.clone(); + pkg.optimizations = match profdata_clone { + Some(path) => OptimizationSettings::for_pgo_replay(path), + None => OptimizationSettings::default(), + }; + pkg.optimizations.enable_lto = request.enable_lto; + pkg.optimizations.enable_pgo = request.enable_pgo; + pkg.optimizations.cflags = cflags; + pkg.optimizations.ldflags = ldflags; + pkg.optimizations.profdata = profdata; + + pkg +} + +fn default_cflags(request: &ScaffoldRequest) -> Vec<String> { + let mut flags = vec!["-O3".to_string(), "-flto".to_string()]; + if request.enable_pgo { + if request.profdata.is_some() { + flags.push("-fprofile-use".to_string()); + } else { + flags.push("-fprofile-generate".to_string()); + } + } + flags +} + +fn default_ldflags(request: &ScaffoldRequest) -> Vec<String> { + let mut flags = vec!["-flto".to_string()]; + if request.enable_pgo { + if request.profdata.is_some() { + flags.push("-fprofile-use".to_string()); + } else { + flags.push("-fprofile-generate".to_string()); + } + } + flags +} + +fn dedup(values: &mut Vec<String>) { + let mut seen = std::collections::BTreeSet::new(); + values.retain(|value| seen.insert(value.clone())); +} + +fn generate_module_source(request: &ScaffoldRequest, definition: &PackageDefinition) -> String { + let mut metadata = Vec::new(); + if let Some(stage) = &request.stage { + metadata.push(format!("stage: {}", stage)); + } + if let Some(variant) = &request.variant { + metadata.push(format!("variant: {}", variant)); + } + if let Some(notes) = &request.notes { + metadata.push(format!("notes: {}", notes)); + } + let metadata = if metadata.is_empty() { + String::new() + } else { + format!("// MLFS metadata: {}\n\n", metadata.join(", ")) + }; + let configure_args = format_vec(&definition.configure_args); + let build_commands = format_vec(&definition.build_commands); + let install_commands = format_vec(&definition.install_commands); + let dependencies = format_vec(&definition.dependencies); + let cflags = format_vec(&definition.optimizations.cflags); + let ldflags = format_vec(&definition.optimizations.ldflags); + let source = format_option(&definition.source); + let md5 = format_option(&definition.md5); + let profdata = format_option(&definition.optimizations.profdata); + + format!( + "{metadata}use crate::pkgs::package::{{OptimizationSettings, PackageDefinition}};\n\n\ + pub fn definition() -> PackageDefinition {{\n\ + let mut pkg = PackageDefinition::new(\"{name}\", \"{version}\");\n\ + pkg.source = {source};\n\ + pkg.md5 = {md5};\n\ + pkg.configure_args = {configure_args};\n\ + pkg.build_commands = {build_commands};\n\ + pkg.install_commands = {install_commands};\n\ + pkg.dependencies = {dependencies};\n\ + let profdata = {profdata};\n\ + let profdata_clone = profdata.clone();\n\ + pkg.optimizations = match profdata_clone {{\n\ + Some(path) => OptimizationSettings::for_pgo_replay(path),\n\ + None => OptimizationSettings::default(),\n\ + }};\n\ + pkg.optimizations.enable_lto = {enable_lto};\n\ + pkg.optimizations.enable_pgo = {enable_pgo};\n\ + pkg.optimizations.cflags = {cflags};\n\ + pkg.optimizations.ldflags = {ldflags};\n\ + pkg.optimizations.profdata = profdata;\n\ + pkg\n\ + }}\n", + metadata = metadata, + name = request.name, + version = request.version, + source = source, + md5 = md5, + configure_args = configure_args, + build_commands = build_commands, + install_commands = install_commands, + dependencies = dependencies, + profdata = profdata, + enable_lto = request.enable_lto, + enable_pgo = request.enable_pgo, + cflags = cflags, + ldflags = ldflags, + ) +} + +fn format_vec(values: &[String]) -> String { + if values.is_empty() { + "Vec::new()".to_string() + } else { + let items: Vec<String> = values + .iter() + .map(|v| format!("\"{}\".to_string()", escape(v))) + .collect(); + format!("vec![{}]", items.join(", ")) + } +} + +fn format_option(value: &Option<String>) -> String { + match value { + Some(v) => format!("Some(\"{}\".to_string())", escape(v)), + None => "None".to_string(), + } +} + +fn sanitize(name: &str) -> String { + let mut out = String::new(); + for ch in name.chars() { + if ch.is_ascii_alphanumeric() { + out.push(ch.to_ascii_lowercase()); + } else if ch == '_' || ch == '+' { + out.push('_'); + } else if ch == '-' { + out.push('_'); + } else { + out.push('_'); + } + } + if out.is_empty() { + out.push_str("pkg"); + } + if out + .chars() + .next() + .map(|c| c.is_ascii_digit()) + .unwrap_or(false) + { + out.insert(0, 'p'); + } + out +} + +fn prefix(module: &str) -> String { + let mut chars = module.chars(); + let first = chars.next().unwrap_or('p'); + let second = chars.next().unwrap_or('k'); + let mut s = String::new(); + s.push(first); + s.push(second); + s +} + +fn escape(input: &str) -> String { + input.replace('\\', "\\\\").replace('"', "\\\"") +} diff --git a/src/tui/disk_manager.rs b/src/tui/disk_manager.rs index 465c2d1..0a08278 100644 --- a/src/tui/disk_manager.rs +++ b/src/tui/disk_manager.rs @@ -1,7 +1,7 @@ // src/tui/disk_manager.rs use std::{ fs::{File, read_dir}, - io::{self, Seek, SeekFrom, Write}, + io::{self, Seek, SeekFrom}, path::PathBuf, }; @@ -186,12 +186,12 @@ impl DiskManager { }, }; - // Create list of lines to display: + // Create list of lines to display using public GPT API: let mut lines: Vec<String> = Vec::new(); lines.push(format!("Partitions on {}:", disk.display())); - for (i, entry_opt) in gpt.partitions.iter().enumerate() { - if let Some(entry) = entry_opt { - let name = entry.partition_name.to_string(); + for (i, entry) in gpt.iter() { + if entry.is_used() { + let name = entry.partition_name.as_str(); lines.push(format!( "{}: {} -> {} (type: {})", i, @@ -388,9 +388,9 @@ impl DiskManager { let sectors = (size_mb as u128 * 1024 * 1024 / 512) as u64; // choose starting LBA: find max ending_lba among existing partitions; align to 2048 let last_end = gpt - .partitions .iter() - .filter_map(|p| p.as_ref().map(|e| e.ending_lba)) + .filter(|(_, e)| e.is_used()) + .map(|(_, e)| e.ending_lba) .max() .unwrap_or(2048); let start = ((last_end + 2048) / 2048) * 2048 + 1; @@ -410,15 +410,15 @@ impl DiskManager { }; new_entry.partition_type_guid = type_guid; - // find first empty partition slot - let idx_opt = gpt.partitions.iter().position(|p| p.is_none()); + // find first empty partition slot (indexing is 1-based for gptman::GPT) + let idx_opt = gpt.iter().find(|(_, e)| e.is_unused()).map(|(i, _)| i); let idx = match idx_opt { Some(i) => i, None => return Err("No free GPT partition entries (maxed out)".into()), }; // assign and write - gpt.partitions[idx] = Some(new_entry); + gpt[idx] = new_entry; // Seek to start (important) file.seek(SeekFrom::Start(0))?; diff --git a/src/tui/main_menu.rs b/src/tui/main_menu.rs index 978d040..31051f1 100644 --- a/src/tui/main_menu.rs +++ b/src/tui/main_menu.rs @@ -1,7 +1,6 @@ use crate::tui::disk_manager::DiskManager; use crossterm::event::{self, Event, KeyCode}; use std::error::Error; -use std::io::Stdout; use tui::{ Terminal, backend::CrosstermBackend, @@ -11,7 +10,7 @@ use tui::{ }; pub fn show_main_menu() -> Result<(), Box<dyn Error>> { - let mut stdout = std::io::stdout(); + let stdout = std::io::stdout(); let backend = CrosstermBackend::new(stdout); let mut terminal = Terminal::new(backend)?; @@ -37,7 +36,7 @@ pub fn show_main_menu() -> Result<(), Box<dyn Error>> { if event::poll(std::time::Duration::from_millis(100))? { if let Event::Key(key) = event::read()? { match key.code { - KeyCode::Char('1') => DiskManager::show_disk_manager(&mut terminal)?, + KeyCode::Char('1') => DiskManager::run_tui()?, KeyCode::Char('0') => break, _ => {} } diff --git a/src/tui/settings.rs b/src/tui/settings.rs index d258dd8..8badd83 100644 --- a/src/tui/settings.rs +++ b/src/tui/settings.rs @@ -18,9 +18,9 @@ impl Theme { } impl Settings { - #[instrument(skip(terminal))] + #[instrument(skip(_terminal))] pub fn show_settings( - terminal: &mut Terminal<CrosstermBackend<Stdout>>, + _terminal: &mut Terminal<CrosstermBackend<Stdout>>, ) -> Result<(), Box<dyn std::error::Error>> { // Render settings UI here Ok(())