Initial commit

This commit is contained in:
semubico 2025-09-08 14:42:23 +03:00
parent f24e05b7f6
commit 4040b3fa4a
3 changed files with 312 additions and 2 deletions

View file

@ -1,8 +1,8 @@
[package]
name = "template"
name = "tgphpgen"
version = "0.1.0"
edition = "2021"
license = "GPL-2.0"
license = "MIT"
[profile.release]
strip = true
@ -11,4 +11,5 @@ lto = "on"
codegen-units = 1
[dependencies]
regex = "1.11.2"

85
README.md Normal file
View file

@ -0,0 +1,85 @@
# rust-telegram-php5-generator
A small Rust CLI utility that parses the Telegram Bot API HTML documentation (saved from https://core.telegram.org/bots/api) and generates PHP methods for every API method, preserving argument order and copying documentation. Designed for projects stuck on PHP 5.6 that cannot use modern Telegram bindings.
---
## Features
- Parses a saved HTML page of the Telegram Bot API documentation.
- Generates PHP method stubs for every API method found.
- Preserves documentation (method description, parameter descriptions) and original argument order.
- Outputs generated PHP code to stdout; errors are written to stderr.
- Accepts optional prefix and suffix files to inject custom code around the generated method list.
---
## Usage
Build and run the utility (example assumes the compiled binary is named `tgphpgen`):
```
tgphpgen /path/to/bots_api.html [/path/to/prefix.php] [/path/to/suffix.php]
```
- First argument (required): **path to the saved HTML documentation file** (the page from core.telegram.org/bots/api).
- Second argument (optional): **path to a prefix file** whose contents will be prepended before the generated methods.
- Third argument (optional): **path to a suffix file** whose contents will be appended after the generated methods.
Output:
- Generated PHP code is written to **stdout**.
- Parsing or file errors are written to **stderr** and the program exits with a non-zero exit code.
---
## Generated PHP style
- Each Telegram API method becomes a PHP method inside a class.
- Argument names and order match the documentation.
- Required arguments are checked against emptiness. When detected an error is returned in `['1' => ...]` for easier destructive assignment using golang-inspired `list ($res, $err) = `
- Parameter types and descriptions are included as PHPDoc above each function when available.
- Optional parameters are given default values to maintain compatibility with PHP 5.6.
- All methods internally invoke `$this->trySend($apiMethodName, $apiMethodArgs)` implementation of which should be provided by user in a prefix/suffix file
---
## Implementation notes
- The parser expects the HTML structure from core.telegram.org/bots/api as saved by a browser. Minor variations in formatting may require adjusting the parser.
- The tool focuses on robustness for PHP 5.6 compatibility:
- No type hints in generated PHP function signatures.
- Optional params default to `null`.
- Uses associative arrays and simple return values (arrays) so integration with existing PHP5.6 code is straightforward. The errors returned in the `1` key as to be easily extractable using golang-like syntax of `list ($result, $error) = ...`.
- The utility prints helpful errors to stderr for:
- Missing input file
- Invalid HTML structure (unable to locate API method sections)
- File read errors for prefix/suffix
---
## Examples
Generate methods only:
```
tgphpgen bots_api.html > telegram_methods.php
```
Generate with a wrapper class (prefix contains class header, suffix contains closing brace):
```
tgphpgen bots_api.html prefix.php suffix.php > TelegramApi.php
```
---
## Contributing
- Fixes for parsing edge cases welcome — the HTML layout on core.telegram.org may change; tests and example saved HTML files help keep the parser resilient.
- Please include a saved copy of the documentation page that reproduces any parsing bug you report.
---
## License
MIT License.

224
src/main.rs Normal file
View file

@ -0,0 +1,224 @@
#[derive(Default, Debug, Clone)]
struct Method {
name: String,
description: String,
notes: String,
params: Vec<Param>
}
#[derive(Default, Debug, Clone)]
struct Param {
name: String,
kind: String,
required: String,
description: String,
returns: String
}
#[derive(Default, Clone)]
enum AlreadyParsed {
MethodName(Method),
MethodDescription(Method),
MethodParams(Method),
Skipping,
#[default]
Nothing
}
fn take_between<'a>(line: &'a str, from: &str, to: &str) -> &'a str {
line
.split(from).nth(1).unwrap_or("")
.split(to).nth(0).unwrap_or("")
}
fn main() {
let path = std::env::args().nth(1).unwrap();
let data = std::fs::read_to_string(path).unwrap();
let data = take_between(&data, r#"<div id="dev_page_content">"#, r#"<div class="footer_wrap">"#);
let data = data.lines().enumerate();
let mut methods: Vec<Method> = Vec::new();
let mut state = AlreadyParsed::default();
let cleanser = regex::RegexBuilder::new("<[^>]*>").build().unwrap();
let mut param_counter = 0;
for (ix, line_) in data {
match state {
// Found method
_ if line_.starts_with("<h4>")
&& line_.split("</a>").nth(1).unwrap_or("").chars().nth(0).unwrap_or('A').is_lowercase() => {
match state {
AlreadyParsed::MethodDescription(ref method)
| AlreadyParsed::MethodName(ref method)
| AlreadyParsed::MethodParams(ref method) => methods.push(method.clone()),
_ => {}
}
let method = Method {
name: cleanser.replace_all(take_between(line_, "</a>", "</h4>"), "").to_string(),
..Default::default()
};
state = AlreadyParsed::MethodName(method);
},
_ if line_.starts_with("<h4>")
&& line_.split("</a>").nth(1).unwrap_or("").chars().nth(0).unwrap_or('a').is_uppercase() => {
match state {
AlreadyParsed::MethodDescription(ref method)
| AlreadyParsed::MethodName(ref method)
| AlreadyParsed::MethodParams(ref method) => methods.push(method.clone()),
_ => {}
}
state = AlreadyParsed::Skipping;
continue
},
AlreadyParsed::MethodName(ref mut method) if line_.starts_with("<p>") => {
method.description = cleanser.replace_all(take_between(line_, "<p>", "</p>"), "").to_string();
state = AlreadyParsed::MethodDescription(method.clone());
},
AlreadyParsed::MethodDescription(ref mut method) if line_.starts_with("<tr") => {
param_counter = 0;
method.params.push(Param::default());
state = AlreadyParsed::MethodDescription(method.clone());
},
AlreadyParsed::MethodDescription(ref mut method) if line_.starts_with("<td") => {
match param_counter {
0 => method.params.last_mut().unwrap().name = cleanser.replace_all(take_between(line_, "<td>", "</td>"), "").to_string(),
1 => method.params.last_mut().unwrap().kind = cleanser.replace_all(take_between(line_, "<td>", "</td>"), "").to_string(),
2 => method.params.last_mut().unwrap().required = cleanser.replace_all(take_between(line_, "<td>", "</td>"), "").to_string(),
3 => method.params.last_mut().unwrap().description = cleanser.replace_all(take_between(line_, "<td>", "</td>"), "").to_string(),
_ => unreachable!()
}
param_counter += 1;
},
_ => {}
}
}
for method in methods.iter_mut() {
method.params = method.params.clone().into_iter().filter(|param| match param {
Param { name, kind, required, .. }
if name == "Name" && kind == "Type" && required == "Description" => false,
_ => true
}).collect();
}
let output = build_methods(&methods);
let prefix = std::env::args().nth(2).and_then(|p| Some(std::fs::read_to_string(p).unwrap())).unwrap_or_default();
let suffix = std::env::args().nth(3).and_then(|p| Some(std::fs::read_to_string(p).unwrap())).unwrap_or_default();
println!("<?php\n{}\n{}\n {}", prefix, output, suffix);
}
fn build_methods(methods: &[Method]) -> String
{
methods
.iter()
.map(|method| {
let doc = build_method_description(&method);
let sign = build_method_signature(&method);
let body = build_method_body(method);
format!("{}{}{}", doc, sign, body)
})
.collect::<String>()
}
fn build_method_body(method: &Method) -> String
{
format!("\n\t{}", format!("{{{}{}{}\n\t}}", buiild_method_params_check(method), build_method_params_converters(method), build_connect_string(method)))
}
fn build_connect_string(method: &Method) -> String
{
let params_string = method.params.iter().filter(|e| !e.name.is_empty()).map(|e| format!("'{}' => ${}", e.name, e.name)).collect::<Vec<String>>().join(", ");
let params_string = format!("[{}]", params_string);
format!("\n\t\treturn $this->trySend(\"{}\", {});", method.name, params_string)
}
fn buiild_method_params_check(method: &Method) -> String
{
method.params
.iter()
.filter(|param| !param.name.is_empty() && param.required.trim() == "Yes")
.map(|param| { format!("\n\t\tif (empty(${})) return [1 => \"Missing required parameter {}\"]; ", param.name, param.name) })
.collect::<String>()
}
fn build_method_params_converters(method: &Method) -> String
{
method.params.iter().filter(|param| !param.name.is_empty() && param.description.contains("JSON-serialized")).map(|param| { format!("\n\t\tif (${}) ${} = json_encode(${}, JSON_NORMAL);", param.name, param.name, param.name) }).collect::<String>()
}
fn build_method_signature(method: &Method) -> String
{
let params_def = method.params
.iter()
.filter_map(|param|
if param.name.is_empty() { None }
else {
let maybe_type = if param.description.contains("JSON-serialized") { "array " } else { "" };
let default_value = if param.required.trim() == "Yes" { "" } else { " = null" };
Some(format!("{}${}{}", maybe_type, param.name, default_value ))
} )
.collect::<Vec<String>>()
.join(", ");
format!("\n\tpublic function {}({})", method.name, params_def)
}
fn build_method_description(method: &Method) -> String
{
let params_doc = method.params.iter().filter(|param| !param.name.is_empty()).map(|param| build_param_description(param)).collect::<String>();
let params_doc = format!("\n\n\n\n\n\t/** {}\n\t* {} {}\n\t*/", method.name, method.description, params_doc);
params_doc
}
fn build_param_description(param: &Param) -> String
{
let req = if &param.required == "Yes" { "Required" } else { &param.required };
format!("\n\t* @param {} {} [{}] {}", build_param_kind(param), param.name, req, param.description)
}
fn build_param_kind(param: &Param) -> String
{
let kind = param.kind.to_lowercase();
fn totype(t:&str) -> String {
match t {
"integer" => "int".to_string(),
"boolean" => "bool".to_string(),
_ => t.to_string()
}
}
let mut kind = kind.split(" ");
let (t1, t2, t3) = (kind.next().and_then(|e| Some(totype(e))), kind.next().and_then(|e| Some(totype(e))), kind.next().and_then(|e| Some(totype(e))));
match (t1, t2, t3) {
(Some(collection), Some(sep), Some(element)) if sep == "of" => format!("{}<{}>", collection.trim(), element.trim()),
(Some(onetype), Some(sep), Some(othertype)) if sep == "or" => format!("{}|{}", onetype.trim(), othertype.trim()),
(Some(typename), _, _) => typename.to_string(),
_ => unreachable!()
}
}