The only use of find_scripts is in browser/scripts.c but the
definition in builtin causes linking problems requiring a stub in
python.c. Move the function to allow the stub to be removed.
Rewrite the directory iteration to use openat so that large character
arrays aren't needed. The arrays are warned about potential buffer
overflows by GCC now that all the code exists in a single C file.
Signed-off-by: Ian Rogers <irogers@google.com>
---
tools/perf/builtin-script.c | 138 ------------------------
tools/perf/builtin.h | 6 --
tools/perf/ui/browsers/scripts.c | 177 ++++++++++++++++++++++++++++++-
tools/perf/util/path.c | 10 ++
tools/perf/util/path.h | 1 +
tools/perf/util/python.c | 6 --
6 files changed, 186 insertions(+), 152 deletions(-)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 5d5a1a06d8c6..e9ec74056f71 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -3521,144 +3521,6 @@ static void free_dlarg(void)
free(dlargv);
}
-/*
- * Some scripts specify the required events in their "xxx-record" file,
- * this function will check if the events in perf.data match those
- * mentioned in the "xxx-record".
- *
- * Fixme: All existing "xxx-record" are all in good formats "-e event ",
- * which is covered well now. And new parsing code should be added to
- * cover the future complex formats like event groups etc.
- */
-static int check_ev_match(char *dir_name, char *scriptname,
- struct perf_session *session)
-{
- char filename[MAXPATHLEN], evname[128];
- char line[BUFSIZ], *p;
- struct evsel *pos;
- int match, len;
- FILE *fp;
-
- scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname);
-
- fp = fopen(filename, "r");
- if (!fp)
- return -1;
-
- while (fgets(line, sizeof(line), fp)) {
- p = skip_spaces(line);
- if (*p == '#')
- continue;
-
- while (strlen(p)) {
- p = strstr(p, "-e");
- if (!p)
- break;
-
- p += 2;
- p = skip_spaces(p);
- len = strcspn(p, " \t");
- if (!len)
- break;
-
- snprintf(evname, len + 1, "%s", p);
-
- match = 0;
- evlist__for_each_entry(session->evlist, pos) {
- if (evsel__name_is(pos, evname)) {
- match = 1;
- break;
- }
- }
-
- if (!match) {
- fclose(fp);
- return -1;
- }
- }
- }
-
- fclose(fp);
- return 0;
-}
-
-/*
- * Return -1 if none is found, otherwise the actual scripts number.
- *
- * Currently the only user of this function is the script browser, which
- * will list all statically runnable scripts, select one, execute it and
- * show the output in a perf browser.
- */
-int find_scripts(char **scripts_array, char **scripts_path_array, int num,
- int pathlen)
-{
- struct dirent *script_dirent, *lang_dirent;
- char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN];
- DIR *scripts_dir, *lang_dir;
- struct perf_session *session;
- struct perf_data data = {
- .path = input_name,
- .mode = PERF_DATA_MODE_READ,
- };
- char *temp;
- int i = 0;
-
- session = perf_session__new(&data, NULL);
- if (IS_ERR(session))
- return PTR_ERR(session);
-
- snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path());
-
- scripts_dir = opendir(scripts_path);
- if (!scripts_dir) {
- perf_session__delete(session);
- return -1;
- }
-
- for_each_lang(scripts_path, scripts_dir, lang_dirent) {
- scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path,
- lang_dirent->d_name);
-#ifndef HAVE_LIBPERL_SUPPORT
- if (strstr(lang_path, "perl"))
- continue;
-#endif
-#ifndef HAVE_LIBPYTHON_SUPPORT
- if (strstr(lang_path, "python"))
- continue;
-#endif
-
- lang_dir = opendir(lang_path);
- if (!lang_dir)
- continue;
-
- for_each_script(lang_path, lang_dir, script_dirent) {
- /* Skip those real time scripts: xxxtop.p[yl] */
- if (strstr(script_dirent->d_name, "top."))
- continue;
- if (i >= num)
- break;
- snprintf(scripts_path_array[i], pathlen, "%s/%s",
- lang_path,
- script_dirent->d_name);
- temp = strchr(script_dirent->d_name, '.');
- snprintf(scripts_array[i],
- (temp - script_dirent->d_name) + 1,
- "%s", script_dirent->d_name);
-
- if (check_ev_match(lang_path,
- scripts_array[i], session))
- continue;
-
- i++;
- }
- closedir(lang_dir);
- }
-
- closedir(scripts_dir);
- perf_session__delete(session);
- return i;
-}
-
static char *get_script_path(const char *script_root, const char *suffix)
{
struct dirent *script_dirent, *lang_dirent;
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 94f4b3769bf7..a07e93c53848 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -2,10 +2,6 @@
#ifndef BUILTIN_H
#define BUILTIN_H
-#include <stddef.h>
-#include <linux/compiler.h>
-#include <tools/config.h>
-
struct feature_status {
const char *name;
const char *macro;
@@ -56,6 +52,4 @@ int cmd_ftrace(int argc, const char **argv);
int cmd_daemon(int argc, const char **argv);
int cmd_kwork(int argc, const char **argv);
-int find_scripts(char **scripts_array, char **scripts_path_array, int num,
- int pathlen);
#endif
diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c
index e437d7889de6..2d04ece833aa 100644
--- a/tools/perf/ui/browsers/scripts.c
+++ b/tools/perf/ui/browsers/scripts.c
@@ -1,16 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
-#include "../../builtin.h"
-#include "../../perf.h"
#include "../../util/util.h" // perf_exe()
#include "../util.h"
+#include "../../util/evlist.h"
#include "../../util/hist.h"
#include "../../util/debug.h"
+#include "../../util/session.h"
#include "../../util/symbol.h"
#include "../browser.h"
#include "../libslang.h"
#include "config.h"
+#include <linux/err.h>
#include <linux/string.h>
#include <linux/zalloc.h>
+#include <subcmd/exec-cmd.h>
#include <stdlib.h>
#define SCRIPT_NAMELEN 128
@@ -77,6 +79,177 @@ static int scripts_config(const char *var, const char *value, void *data)
return 0;
}
+/*
+ * Some scripts specify the required events in their "xxx-record" file,
+ * this function will check if the events in perf.data match those
+ * mentioned in the "xxx-record".
+ *
+ * Fixme: All existing "xxx-record" are all in good formats "-e event ",
+ * which is covered well now. And new parsing code should be added to
+ * cover the future complex formats like event groups etc.
+ */
+static int check_ev_match(int dir_fd, const char *scriptname, struct perf_session *session)
+{
+ char line[BUFSIZ];
+ FILE *fp;
+
+ {
+ char filename[FILENAME_MAX + 5];
+ int fd;
+
+ scnprintf(filename, sizeof(filename), "bin/%s-record", scriptname);
+ fd = openat(dir_fd, filename, O_RDONLY);
+ if (fd == -1)
+ return -1;
+ fp = fdopen(fd, "r");
+ if (!fp)
+ return -1;
+ }
+
+ while (fgets(line, sizeof(line), fp)) {
+ char *p = skip_spaces(line);
+
+ if (*p == '#')
+ continue;
+
+ while (strlen(p)) {
+ int match, len;
+ struct evsel *pos;
+ char evname[128];
+
+ p = strstr(p, "-e");
+ if (!p)
+ break;
+
+ p += 2;
+ p = skip_spaces(p);
+ len = strcspn(p, " \t");
+ if (!len)
+ break;
+
+ snprintf(evname, len + 1, "%s", p);
+
+ match = 0;
+ evlist__for_each_entry(session->evlist, pos) {
+ if (evsel__name_is(pos, evname)) {
+ match = 1;
+ break;
+ }
+ }
+
+ if (!match) {
+ fclose(fp);
+ return -1;
+ }
+ }
+ }
+
+ fclose(fp);
+ return 0;
+}
+
+/*
+ * Return -1 if none is found, otherwise the actual scripts number.
+ *
+ * Currently the only user of this function is the script browser, which
+ * will list all statically runnable scripts, select one, execute it and
+ * show the output in a perf browser.
+ */
+static int find_scripts(char **scripts_array, char **scripts_path_array, int num,
+ int pathlen)
+{
+ struct dirent *script_dirent, *lang_dirent;
+ int scripts_dir_fd, lang_dir_fd;
+ DIR *scripts_dir, *lang_dir;
+ struct perf_session *session;
+ struct perf_data data = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ };
+ char *temp;
+ int i = 0;
+ const char *exec_path = get_argv_exec_path();
+
+ session = perf_session__new(&data, NULL);
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+
+ {
+ char scripts_path[PATH_MAX];
+
+ snprintf(scripts_path, sizeof(scripts_path), "%s/scripts", exec_path);
+ scripts_dir_fd = open(scripts_path, O_DIRECTORY);
+ pr_err("Failed to open directory '%s'", scripts_path);
+ if (scripts_dir_fd == -1) {
+ perf_session__delete(session);
+ return -1;
+ }
+ }
+ scripts_dir = fdopendir(scripts_dir_fd);
+ if (!scripts_dir) {
+ close(scripts_dir_fd);
+ perf_session__delete(session);
+ return -1;
+ }
+
+ while ((lang_dirent = readdir(scripts_dir)) != NULL) {
+ if (lang_dirent->d_type != DT_DIR &&
+ (lang_dirent->d_type == DT_UNKNOWN &&
+ !is_directory_at(scripts_dir_fd, lang_dirent->d_name)))
+ continue;
+ if (!strcmp(lang_dirent->d_name, ".") || !strcmp(lang_dirent->d_name, ".."))
+ continue;
+
+#ifndef HAVE_LIBPERL_SUPPORT
+ if (strstr(lang_dirent->d_name, "perl"))
+ continue;
+#endif
+#ifndef HAVE_LIBPYTHON_SUPPORT
+ if (strstr(lang_dirent->d_name, "python"))
+ continue;
+#endif
+
+ lang_dir_fd = openat(scripts_dir_fd, lang_dirent->d_name, O_DIRECTORY);
+ if (lang_dir_fd == -1)
+ continue;
+ lang_dir = fdopendir(lang_dir_fd);
+ if (!lang_dir) {
+ close(lang_dir_fd);
+ continue;
+ }
+ while ((script_dirent = readdir(lang_dir)) != NULL) {
+ if (script_dirent->d_type == DT_DIR)
+ continue;
+ if (script_dirent->d_type == DT_UNKNOWN &&
+ is_directory_at(lang_dir_fd, script_dirent->d_name))
+ continue;
+ /* Skip those real time scripts: xxxtop.p[yl] */
+ if (strstr(script_dirent->d_name, "top."))
+ continue;
+ if (i >= num)
+ break;
+ scnprintf(scripts_path_array[i], pathlen, "%s/scripts/%s/%s",
+ exec_path,
+ lang_dirent->d_name,
+ script_dirent->d_name);
+ temp = strchr(script_dirent->d_name, '.');
+ snprintf(scripts_array[i],
+ (temp - script_dirent->d_name) + 1,
+ "%s", script_dirent->d_name);
+
+ if (check_ev_match(lang_dir_fd, scripts_array[i], session))
+ continue;
+
+ i++;
+ }
+ closedir(lang_dir);
+ }
+
+ closedir(scripts_dir);
+ perf_session__delete(session);
+ return i;
+}
+
/*
* When success, will copy the full path of the selected script
* into the buffer pointed by script_name, and return 0.
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 00adf872bf00..9712466c51e2 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -68,6 +68,16 @@ bool is_directory(const char *base_path, const struct dirent *dent)
return S_ISDIR(st.st_mode);
}
+bool is_directory_at(int dir_fd, const char *path)
+{
+ struct stat st;
+
+ if (fstatat(dir_fd, path, &st, /*flags=*/0))
+ return false;
+
+ return S_ISDIR(st.st_mode);
+}
+
bool is_executable_file(const char *base_path, const struct dirent *dent)
{
char path[PATH_MAX];
diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h
index d94902c22222..fbafbe7015dd 100644
--- a/tools/perf/util/path.h
+++ b/tools/perf/util/path.h
@@ -12,6 +12,7 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con
bool is_regular_file(const char *file);
bool is_directory(const char *base_path, const struct dirent *dent);
+bool is_directory_at(int dir_fd, const char *path);
bool is_executable_file(const char *base_path, const struct dirent *dent);
#endif /* _PERF_PATH_H */
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index ab67abf3b607..5f11ae88943d 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1306,12 +1306,6 @@ PyMODINIT_FUNC PyInit_perf(void)
/* The following are stubs to avoid dragging in builtin-* objects. */
/* TODO: move the code out of the builtin-* file into util. */
-int find_scripts(char **scripts_array __maybe_unused, char **scripts_path_array __maybe_unused,
- int num __maybe_unused, int pathlen __maybe_unused)
-{
- return -1;
-}
-
void perf_stat__set_no_csv_summary(int set __maybe_unused)
{
}
--
2.47.0.163.g1226f6d8fa-goog
On Wed, Oct 30, 2024 at 06:42:37PM -0700, Ian Rogers wrote: > The only use of find_scripts is in browser/scripts.c but the > definition in builtin causes linking problems requiring a stub in > python.c. Move the function to allow the stub to be removed. > > Rewrite the directory iteration to use openat so that large character > arrays aren't needed. The arrays are warned about potential buffer > overflows by GCC now that all the code exists in a single C file. Introducing is_directory_at() should be done as a prep patch, as the rest of the patch below could end up being reverted after some other patch used it, making the process more difficult. I mentioned cases like this in the past, so doing it again just for the record. - Arnaldo > Signed-off-by: Ian Rogers <irogers@google.com> > --- > tools/perf/builtin-script.c | 138 ------------------------ > tools/perf/builtin.h | 6 -- > tools/perf/ui/browsers/scripts.c | 177 ++++++++++++++++++++++++++++++- > tools/perf/util/path.c | 10 ++ > tools/perf/util/path.h | 1 + > tools/perf/util/python.c | 6 -- > 6 files changed, 186 insertions(+), 152 deletions(-) > > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c > index 5d5a1a06d8c6..e9ec74056f71 100644 > --- a/tools/perf/builtin-script.c > +++ b/tools/perf/builtin-script.c > @@ -3521,144 +3521,6 @@ static void free_dlarg(void) > free(dlargv); > } > > -/* > - * Some scripts specify the required events in their "xxx-record" file, > - * this function will check if the events in perf.data match those > - * mentioned in the "xxx-record". > - * > - * Fixme: All existing "xxx-record" are all in good formats "-e event ", > - * which is covered well now. And new parsing code should be added to > - * cover the future complex formats like event groups etc. > - */ > -static int check_ev_match(char *dir_name, char *scriptname, > - struct perf_session *session) > -{ > - char filename[MAXPATHLEN], evname[128]; > - char line[BUFSIZ], *p; > - struct evsel *pos; > - int match, len; > - FILE *fp; > - > - scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname); > - > - fp = fopen(filename, "r"); > - if (!fp) > - return -1; > - > - while (fgets(line, sizeof(line), fp)) { > - p = skip_spaces(line); > - if (*p == '#') > - continue; > - > - while (strlen(p)) { > - p = strstr(p, "-e"); > - if (!p) > - break; > - > - p += 2; > - p = skip_spaces(p); > - len = strcspn(p, " \t"); > - if (!len) > - break; > - > - snprintf(evname, len + 1, "%s", p); > - > - match = 0; > - evlist__for_each_entry(session->evlist, pos) { > - if (evsel__name_is(pos, evname)) { > - match = 1; > - break; > - } > - } > - > - if (!match) { > - fclose(fp); > - return -1; > - } > - } > - } > - > - fclose(fp); > - return 0; > -} > - > -/* > - * Return -1 if none is found, otherwise the actual scripts number. > - * > - * Currently the only user of this function is the script browser, which > - * will list all statically runnable scripts, select one, execute it and > - * show the output in a perf browser. > - */ > -int find_scripts(char **scripts_array, char **scripts_path_array, int num, > - int pathlen) > -{ > - struct dirent *script_dirent, *lang_dirent; > - char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; > - DIR *scripts_dir, *lang_dir; > - struct perf_session *session; > - struct perf_data data = { > - .path = input_name, > - .mode = PERF_DATA_MODE_READ, > - }; > - char *temp; > - int i = 0; > - > - session = perf_session__new(&data, NULL); > - if (IS_ERR(session)) > - return PTR_ERR(session); > - > - snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); > - > - scripts_dir = opendir(scripts_path); > - if (!scripts_dir) { > - perf_session__delete(session); > - return -1; > - } > - > - for_each_lang(scripts_path, scripts_dir, lang_dirent) { > - scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, > - lang_dirent->d_name); > -#ifndef HAVE_LIBPERL_SUPPORT > - if (strstr(lang_path, "perl")) > - continue; > -#endif > -#ifndef HAVE_LIBPYTHON_SUPPORT > - if (strstr(lang_path, "python")) > - continue; > -#endif > - > - lang_dir = opendir(lang_path); > - if (!lang_dir) > - continue; > - > - for_each_script(lang_path, lang_dir, script_dirent) { > - /* Skip those real time scripts: xxxtop.p[yl] */ > - if (strstr(script_dirent->d_name, "top.")) > - continue; > - if (i >= num) > - break; > - snprintf(scripts_path_array[i], pathlen, "%s/%s", > - lang_path, > - script_dirent->d_name); > - temp = strchr(script_dirent->d_name, '.'); > - snprintf(scripts_array[i], > - (temp - script_dirent->d_name) + 1, > - "%s", script_dirent->d_name); > - > - if (check_ev_match(lang_path, > - scripts_array[i], session)) > - continue; > - > - i++; > - } > - closedir(lang_dir); > - } > - > - closedir(scripts_dir); > - perf_session__delete(session); > - return i; > -} > - > static char *get_script_path(const char *script_root, const char *suffix) > { > struct dirent *script_dirent, *lang_dirent; > diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h > index 94f4b3769bf7..a07e93c53848 100644 > --- a/tools/perf/builtin.h > +++ b/tools/perf/builtin.h > @@ -2,10 +2,6 @@ > #ifndef BUILTIN_H > #define BUILTIN_H > > -#include <stddef.h> > -#include <linux/compiler.h> > -#include <tools/config.h> > - > struct feature_status { > const char *name; > const char *macro; > @@ -56,6 +52,4 @@ int cmd_ftrace(int argc, const char **argv); > int cmd_daemon(int argc, const char **argv); > int cmd_kwork(int argc, const char **argv); > > -int find_scripts(char **scripts_array, char **scripts_path_array, int num, > - int pathlen); > #endif > diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c > index e437d7889de6..2d04ece833aa 100644 > --- a/tools/perf/ui/browsers/scripts.c > +++ b/tools/perf/ui/browsers/scripts.c > @@ -1,16 +1,18 @@ > // SPDX-License-Identifier: GPL-2.0 > -#include "../../builtin.h" > -#include "../../perf.h" > #include "../../util/util.h" // perf_exe() > #include "../util.h" > +#include "../../util/evlist.h" > #include "../../util/hist.h" > #include "../../util/debug.h" > +#include "../../util/session.h" > #include "../../util/symbol.h" > #include "../browser.h" > #include "../libslang.h" > #include "config.h" > +#include <linux/err.h> > #include <linux/string.h> > #include <linux/zalloc.h> > +#include <subcmd/exec-cmd.h> > #include <stdlib.h> > > #define SCRIPT_NAMELEN 128 > @@ -77,6 +79,177 @@ static int scripts_config(const char *var, const char *value, void *data) > return 0; > } > > +/* > + * Some scripts specify the required events in their "xxx-record" file, > + * this function will check if the events in perf.data match those > + * mentioned in the "xxx-record". > + * > + * Fixme: All existing "xxx-record" are all in good formats "-e event ", > + * which is covered well now. And new parsing code should be added to > + * cover the future complex formats like event groups etc. > + */ > +static int check_ev_match(int dir_fd, const char *scriptname, struct perf_session *session) > +{ > + char line[BUFSIZ]; > + FILE *fp; > + > + { > + char filename[FILENAME_MAX + 5]; > + int fd; > + > + scnprintf(filename, sizeof(filename), "bin/%s-record", scriptname); > + fd = openat(dir_fd, filename, O_RDONLY); > + if (fd == -1) > + return -1; > + fp = fdopen(fd, "r"); > + if (!fp) > + return -1; > + } > + > + while (fgets(line, sizeof(line), fp)) { > + char *p = skip_spaces(line); > + > + if (*p == '#') > + continue; > + > + while (strlen(p)) { > + int match, len; > + struct evsel *pos; > + char evname[128]; > + > + p = strstr(p, "-e"); > + if (!p) > + break; > + > + p += 2; > + p = skip_spaces(p); > + len = strcspn(p, " \t"); > + if (!len) > + break; > + > + snprintf(evname, len + 1, "%s", p); > + > + match = 0; > + evlist__for_each_entry(session->evlist, pos) { > + if (evsel__name_is(pos, evname)) { > + match = 1; > + break; > + } > + } > + > + if (!match) { > + fclose(fp); > + return -1; > + } > + } > + } > + > + fclose(fp); > + return 0; > +} > + > +/* > + * Return -1 if none is found, otherwise the actual scripts number. > + * > + * Currently the only user of this function is the script browser, which > + * will list all statically runnable scripts, select one, execute it and > + * show the output in a perf browser. > + */ > +static int find_scripts(char **scripts_array, char **scripts_path_array, int num, > + int pathlen) > +{ > + struct dirent *script_dirent, *lang_dirent; > + int scripts_dir_fd, lang_dir_fd; > + DIR *scripts_dir, *lang_dir; > + struct perf_session *session; > + struct perf_data data = { > + .path = input_name, > + .mode = PERF_DATA_MODE_READ, > + }; > + char *temp; > + int i = 0; > + const char *exec_path = get_argv_exec_path(); > + > + session = perf_session__new(&data, NULL); > + if (IS_ERR(session)) > + return PTR_ERR(session); > + > + { > + char scripts_path[PATH_MAX]; > + > + snprintf(scripts_path, sizeof(scripts_path), "%s/scripts", exec_path); > + scripts_dir_fd = open(scripts_path, O_DIRECTORY); > + pr_err("Failed to open directory '%s'", scripts_path); > + if (scripts_dir_fd == -1) { > + perf_session__delete(session); > + return -1; > + } > + } > + scripts_dir = fdopendir(scripts_dir_fd); > + if (!scripts_dir) { > + close(scripts_dir_fd); > + perf_session__delete(session); > + return -1; > + } > + > + while ((lang_dirent = readdir(scripts_dir)) != NULL) { > + if (lang_dirent->d_type != DT_DIR && > + (lang_dirent->d_type == DT_UNKNOWN && > + !is_directory_at(scripts_dir_fd, lang_dirent->d_name))) > + continue; > + if (!strcmp(lang_dirent->d_name, ".") || !strcmp(lang_dirent->d_name, "..")) > + continue; > + > +#ifndef HAVE_LIBPERL_SUPPORT > + if (strstr(lang_dirent->d_name, "perl")) > + continue; > +#endif > +#ifndef HAVE_LIBPYTHON_SUPPORT > + if (strstr(lang_dirent->d_name, "python")) > + continue; > +#endif > + > + lang_dir_fd = openat(scripts_dir_fd, lang_dirent->d_name, O_DIRECTORY); > + if (lang_dir_fd == -1) > + continue; > + lang_dir = fdopendir(lang_dir_fd); > + if (!lang_dir) { > + close(lang_dir_fd); > + continue; > + } > + while ((script_dirent = readdir(lang_dir)) != NULL) { > + if (script_dirent->d_type == DT_DIR) > + continue; > + if (script_dirent->d_type == DT_UNKNOWN && > + is_directory_at(lang_dir_fd, script_dirent->d_name)) > + continue; > + /* Skip those real time scripts: xxxtop.p[yl] */ > + if (strstr(script_dirent->d_name, "top.")) > + continue; > + if (i >= num) > + break; > + scnprintf(scripts_path_array[i], pathlen, "%s/scripts/%s/%s", > + exec_path, > + lang_dirent->d_name, > + script_dirent->d_name); > + temp = strchr(script_dirent->d_name, '.'); > + snprintf(scripts_array[i], > + (temp - script_dirent->d_name) + 1, > + "%s", script_dirent->d_name); > + > + if (check_ev_match(lang_dir_fd, scripts_array[i], session)) > + continue; > + > + i++; > + } > + closedir(lang_dir); > + } > + > + closedir(scripts_dir); > + perf_session__delete(session); > + return i; > +} > + > /* > * When success, will copy the full path of the selected script > * into the buffer pointed by script_name, and return 0. > diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c > index 00adf872bf00..9712466c51e2 100644 > --- a/tools/perf/util/path.c > +++ b/tools/perf/util/path.c > @@ -68,6 +68,16 @@ bool is_directory(const char *base_path, const struct dirent *dent) > return S_ISDIR(st.st_mode); > } > > +bool is_directory_at(int dir_fd, const char *path) > +{ > + struct stat st; > + > + if (fstatat(dir_fd, path, &st, /*flags=*/0)) > + return false; > + > + return S_ISDIR(st.st_mode); > +} > + > bool is_executable_file(const char *base_path, const struct dirent *dent) > { > char path[PATH_MAX]; > diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h > index d94902c22222..fbafbe7015dd 100644 > --- a/tools/perf/util/path.h > +++ b/tools/perf/util/path.h > @@ -12,6 +12,7 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con > > bool is_regular_file(const char *file); > bool is_directory(const char *base_path, const struct dirent *dent); > +bool is_directory_at(int dir_fd, const char *path); > bool is_executable_file(const char *base_path, const struct dirent *dent); > > #endif /* _PERF_PATH_H */ > diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c > index ab67abf3b607..5f11ae88943d 100644 > --- a/tools/perf/util/python.c > +++ b/tools/perf/util/python.c > @@ -1306,12 +1306,6 @@ PyMODINIT_FUNC PyInit_perf(void) > /* The following are stubs to avoid dragging in builtin-* objects. */ > /* TODO: move the code out of the builtin-* file into util. */ > > -int find_scripts(char **scripts_array __maybe_unused, char **scripts_path_array __maybe_unused, > - int num __maybe_unused, int pathlen __maybe_unused) > -{ > - return -1; > -} > - > void perf_stat__set_no_csv_summary(int set __maybe_unused) > { > } > -- > 2.47.0.163.g1226f6d8fa-goog >
On Thu, Oct 31, 2024 at 12:18 PM Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > On Wed, Oct 30, 2024 at 06:42:37PM -0700, Ian Rogers wrote: > > The only use of find_scripts is in browser/scripts.c but the > > definition in builtin causes linking problems requiring a stub in > > python.c. Move the function to allow the stub to be removed. > > > > Rewrite the directory iteration to use openat so that large character > > arrays aren't needed. The arrays are warned about potential buffer > > overflows by GCC now that all the code exists in a single C file. > > Introducing is_directory_at() should be done as a prep patch, as the > rest of the patch below could end up being reverted after some other > patch used it, making the process more difficult. > > I mentioned cases like this in the past, so doing it again just for the > record. This is highlighted in the commit message: ``` Rewrite the directory iteration to use openat so that large character arrays aren't needed. The arrays are warned about potential buffer overflows by GCC now that all the code exists in a single C file. ``` so without the change the code wouldn't build. The new is_directory_at function is effectively 2 statements fstatat and S_ISDIR on the result, it is put next to is_directory for consistency but could have been a static function in the only C file to use it. For the record, patches introducing 2 line long functions can be excessively noisy, especially in a 21 patch series. There is always the declared but not used build error to worry about - here things couldn't just be simply moved due to triggering a different build error. Given the simplicity of the function here I made a decision not to split up the work - the commit message would likely be longer than the function. The work never intended to introduce is_directory_at but was forced into it through a desire not to disable compiler warnings. Thanks, Ian > > - Arnaldo > > > Signed-off-by: Ian Rogers <irogers@google.com> > > --- > > tools/perf/builtin-script.c | 138 ------------------------ > > tools/perf/builtin.h | 6 -- > > tools/perf/ui/browsers/scripts.c | 177 ++++++++++++++++++++++++++++++- > > tools/perf/util/path.c | 10 ++ > > tools/perf/util/path.h | 1 + > > tools/perf/util/python.c | 6 -- > > 6 files changed, 186 insertions(+), 152 deletions(-) > > > > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c > > index 5d5a1a06d8c6..e9ec74056f71 100644 > > --- a/tools/perf/builtin-script.c > > +++ b/tools/perf/builtin-script.c > > @@ -3521,144 +3521,6 @@ static void free_dlarg(void) > > free(dlargv); > > } > > > > -/* > > - * Some scripts specify the required events in their "xxx-record" file, > > - * this function will check if the events in perf.data match those > > - * mentioned in the "xxx-record". > > - * > > - * Fixme: All existing "xxx-record" are all in good formats "-e event ", > > - * which is covered well now. And new parsing code should be added to > > - * cover the future complex formats like event groups etc. > > - */ > > -static int check_ev_match(char *dir_name, char *scriptname, > > - struct perf_session *session) > > -{ > > - char filename[MAXPATHLEN], evname[128]; > > - char line[BUFSIZ], *p; > > - struct evsel *pos; > > - int match, len; > > - FILE *fp; > > - > > - scnprintf(filename, MAXPATHLEN, "%s/bin/%s-record", dir_name, scriptname); > > - > > - fp = fopen(filename, "r"); > > - if (!fp) > > - return -1; > > - > > - while (fgets(line, sizeof(line), fp)) { > > - p = skip_spaces(line); > > - if (*p == '#') > > - continue; > > - > > - while (strlen(p)) { > > - p = strstr(p, "-e"); > > - if (!p) > > - break; > > - > > - p += 2; > > - p = skip_spaces(p); > > - len = strcspn(p, " \t"); > > - if (!len) > > - break; > > - > > - snprintf(evname, len + 1, "%s", p); > > - > > - match = 0; > > - evlist__for_each_entry(session->evlist, pos) { > > - if (evsel__name_is(pos, evname)) { > > - match = 1; > > - break; > > - } > > - } > > - > > - if (!match) { > > - fclose(fp); > > - return -1; > > - } > > - } > > - } > > - > > - fclose(fp); > > - return 0; > > -} > > - > > -/* > > - * Return -1 if none is found, otherwise the actual scripts number. > > - * > > - * Currently the only user of this function is the script browser, which > > - * will list all statically runnable scripts, select one, execute it and > > - * show the output in a perf browser. > > - */ > > -int find_scripts(char **scripts_array, char **scripts_path_array, int num, > > - int pathlen) > > -{ > > - struct dirent *script_dirent, *lang_dirent; > > - char scripts_path[MAXPATHLEN], lang_path[MAXPATHLEN]; > > - DIR *scripts_dir, *lang_dir; > > - struct perf_session *session; > > - struct perf_data data = { > > - .path = input_name, > > - .mode = PERF_DATA_MODE_READ, > > - }; > > - char *temp; > > - int i = 0; > > - > > - session = perf_session__new(&data, NULL); > > - if (IS_ERR(session)) > > - return PTR_ERR(session); > > - > > - snprintf(scripts_path, MAXPATHLEN, "%s/scripts", get_argv_exec_path()); > > - > > - scripts_dir = opendir(scripts_path); > > - if (!scripts_dir) { > > - perf_session__delete(session); > > - return -1; > > - } > > - > > - for_each_lang(scripts_path, scripts_dir, lang_dirent) { > > - scnprintf(lang_path, MAXPATHLEN, "%s/%s", scripts_path, > > - lang_dirent->d_name); > > -#ifndef HAVE_LIBPERL_SUPPORT > > - if (strstr(lang_path, "perl")) > > - continue; > > -#endif > > -#ifndef HAVE_LIBPYTHON_SUPPORT > > - if (strstr(lang_path, "python")) > > - continue; > > -#endif > > - > > - lang_dir = opendir(lang_path); > > - if (!lang_dir) > > - continue; > > - > > - for_each_script(lang_path, lang_dir, script_dirent) { > > - /* Skip those real time scripts: xxxtop.p[yl] */ > > - if (strstr(script_dirent->d_name, "top.")) > > - continue; > > - if (i >= num) > > - break; > > - snprintf(scripts_path_array[i], pathlen, "%s/%s", > > - lang_path, > > - script_dirent->d_name); > > - temp = strchr(script_dirent->d_name, '.'); > > - snprintf(scripts_array[i], > > - (temp - script_dirent->d_name) + 1, > > - "%s", script_dirent->d_name); > > - > > - if (check_ev_match(lang_path, > > - scripts_array[i], session)) > > - continue; > > - > > - i++; > > - } > > - closedir(lang_dir); > > - } > > - > > - closedir(scripts_dir); > > - perf_session__delete(session); > > - return i; > > -} > > - > > static char *get_script_path(const char *script_root, const char *suffix) > > { > > struct dirent *script_dirent, *lang_dirent; > > diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h > > index 94f4b3769bf7..a07e93c53848 100644 > > --- a/tools/perf/builtin.h > > +++ b/tools/perf/builtin.h > > @@ -2,10 +2,6 @@ > > #ifndef BUILTIN_H > > #define BUILTIN_H > > > > -#include <stddef.h> > > -#include <linux/compiler.h> > > -#include <tools/config.h> > > - > > struct feature_status { > > const char *name; > > const char *macro; > > @@ -56,6 +52,4 @@ int cmd_ftrace(int argc, const char **argv); > > int cmd_daemon(int argc, const char **argv); > > int cmd_kwork(int argc, const char **argv); > > > > -int find_scripts(char **scripts_array, char **scripts_path_array, int num, > > - int pathlen); > > #endif > > diff --git a/tools/perf/ui/browsers/scripts.c b/tools/perf/ui/browsers/scripts.c > > index e437d7889de6..2d04ece833aa 100644 > > --- a/tools/perf/ui/browsers/scripts.c > > +++ b/tools/perf/ui/browsers/scripts.c > > @@ -1,16 +1,18 @@ > > // SPDX-License-Identifier: GPL-2.0 > > -#include "../../builtin.h" > > -#include "../../perf.h" > > #include "../../util/util.h" // perf_exe() > > #include "../util.h" > > +#include "../../util/evlist.h" > > #include "../../util/hist.h" > > #include "../../util/debug.h" > > +#include "../../util/session.h" > > #include "../../util/symbol.h" > > #include "../browser.h" > > #include "../libslang.h" > > #include "config.h" > > +#include <linux/err.h> > > #include <linux/string.h> > > #include <linux/zalloc.h> > > +#include <subcmd/exec-cmd.h> > > #include <stdlib.h> > > > > #define SCRIPT_NAMELEN 128 > > @@ -77,6 +79,177 @@ static int scripts_config(const char *var, const char *value, void *data) > > return 0; > > } > > > > +/* > > + * Some scripts specify the required events in their "xxx-record" file, > > + * this function will check if the events in perf.data match those > > + * mentioned in the "xxx-record". > > + * > > + * Fixme: All existing "xxx-record" are all in good formats "-e event ", > > + * which is covered well now. And new parsing code should be added to > > + * cover the future complex formats like event groups etc. > > + */ > > +static int check_ev_match(int dir_fd, const char *scriptname, struct perf_session *session) > > +{ > > + char line[BUFSIZ]; > > + FILE *fp; > > + > > + { > > + char filename[FILENAME_MAX + 5]; > > + int fd; > > + > > + scnprintf(filename, sizeof(filename), "bin/%s-record", scriptname); > > + fd = openat(dir_fd, filename, O_RDONLY); > > + if (fd == -1) > > + return -1; > > + fp = fdopen(fd, "r"); > > + if (!fp) > > + return -1; > > + } > > + > > + while (fgets(line, sizeof(line), fp)) { > > + char *p = skip_spaces(line); > > + > > + if (*p == '#') > > + continue; > > + > > + while (strlen(p)) { > > + int match, len; > > + struct evsel *pos; > > + char evname[128]; > > + > > + p = strstr(p, "-e"); > > + if (!p) > > + break; > > + > > + p += 2; > > + p = skip_spaces(p); > > + len = strcspn(p, " \t"); > > + if (!len) > > + break; > > + > > + snprintf(evname, len + 1, "%s", p); > > + > > + match = 0; > > + evlist__for_each_entry(session->evlist, pos) { > > + if (evsel__name_is(pos, evname)) { > > + match = 1; > > + break; > > + } > > + } > > + > > + if (!match) { > > + fclose(fp); > > + return -1; > > + } > > + } > > + } > > + > > + fclose(fp); > > + return 0; > > +} > > + > > +/* > > + * Return -1 if none is found, otherwise the actual scripts number. > > + * > > + * Currently the only user of this function is the script browser, which > > + * will list all statically runnable scripts, select one, execute it and > > + * show the output in a perf browser. > > + */ > > +static int find_scripts(char **scripts_array, char **scripts_path_array, int num, > > + int pathlen) > > +{ > > + struct dirent *script_dirent, *lang_dirent; > > + int scripts_dir_fd, lang_dir_fd; > > + DIR *scripts_dir, *lang_dir; > > + struct perf_session *session; > > + struct perf_data data = { > > + .path = input_name, > > + .mode = PERF_DATA_MODE_READ, > > + }; > > + char *temp; > > + int i = 0; > > + const char *exec_path = get_argv_exec_path(); > > + > > + session = perf_session__new(&data, NULL); > > + if (IS_ERR(session)) > > + return PTR_ERR(session); > > + > > + { > > + char scripts_path[PATH_MAX]; > > + > > + snprintf(scripts_path, sizeof(scripts_path), "%s/scripts", exec_path); > > + scripts_dir_fd = open(scripts_path, O_DIRECTORY); > > + pr_err("Failed to open directory '%s'", scripts_path); > > + if (scripts_dir_fd == -1) { > > + perf_session__delete(session); > > + return -1; > > + } > > + } > > + scripts_dir = fdopendir(scripts_dir_fd); > > + if (!scripts_dir) { > > + close(scripts_dir_fd); > > + perf_session__delete(session); > > + return -1; > > + } > > + > > + while ((lang_dirent = readdir(scripts_dir)) != NULL) { > > + if (lang_dirent->d_type != DT_DIR && > > + (lang_dirent->d_type == DT_UNKNOWN && > > + !is_directory_at(scripts_dir_fd, lang_dirent->d_name))) > > + continue; > > + if (!strcmp(lang_dirent->d_name, ".") || !strcmp(lang_dirent->d_name, "..")) > > + continue; > > + > > +#ifndef HAVE_LIBPERL_SUPPORT > > + if (strstr(lang_dirent->d_name, "perl")) > > + continue; > > +#endif > > +#ifndef HAVE_LIBPYTHON_SUPPORT > > + if (strstr(lang_dirent->d_name, "python")) > > + continue; > > +#endif > > + > > + lang_dir_fd = openat(scripts_dir_fd, lang_dirent->d_name, O_DIRECTORY); > > + if (lang_dir_fd == -1) > > + continue; > > + lang_dir = fdopendir(lang_dir_fd); > > + if (!lang_dir) { > > + close(lang_dir_fd); > > + continue; > > + } > > + while ((script_dirent = readdir(lang_dir)) != NULL) { > > + if (script_dirent->d_type == DT_DIR) > > + continue; > > + if (script_dirent->d_type == DT_UNKNOWN && > > + is_directory_at(lang_dir_fd, script_dirent->d_name)) > > + continue; > > + /* Skip those real time scripts: xxxtop.p[yl] */ > > + if (strstr(script_dirent->d_name, "top.")) > > + continue; > > + if (i >= num) > > + break; > > + scnprintf(scripts_path_array[i], pathlen, "%s/scripts/%s/%s", > > + exec_path, > > + lang_dirent->d_name, > > + script_dirent->d_name); > > + temp = strchr(script_dirent->d_name, '.'); > > + snprintf(scripts_array[i], > > + (temp - script_dirent->d_name) + 1, > > + "%s", script_dirent->d_name); > > + > > + if (check_ev_match(lang_dir_fd, scripts_array[i], session)) > > + continue; > > + > > + i++; > > + } > > + closedir(lang_dir); > > + } > > + > > + closedir(scripts_dir); > > + perf_session__delete(session); > > + return i; > > +} > > + > > /* > > * When success, will copy the full path of the selected script > > * into the buffer pointed by script_name, and return 0. > > diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c > > index 00adf872bf00..9712466c51e2 100644 > > --- a/tools/perf/util/path.c > > +++ b/tools/perf/util/path.c > > @@ -68,6 +68,16 @@ bool is_directory(const char *base_path, const struct dirent *dent) > > return S_ISDIR(st.st_mode); > > } > > > > +bool is_directory_at(int dir_fd, const char *path) > > +{ > > + struct stat st; > > + > > + if (fstatat(dir_fd, path, &st, /*flags=*/0)) > > + return false; > > + > > + return S_ISDIR(st.st_mode); > > +} > > + > > bool is_executable_file(const char *base_path, const struct dirent *dent) > > { > > char path[PATH_MAX]; > > diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h > > index d94902c22222..fbafbe7015dd 100644 > > --- a/tools/perf/util/path.h > > +++ b/tools/perf/util/path.h > > @@ -12,6 +12,7 @@ int path__join3(char *bf, size_t size, const char *path1, const char *path2, con > > > > bool is_regular_file(const char *file); > > bool is_directory(const char *base_path, const struct dirent *dent); > > +bool is_directory_at(int dir_fd, const char *path); > > bool is_executable_file(const char *base_path, const struct dirent *dent); > > > > #endif /* _PERF_PATH_H */ > > diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c > > index ab67abf3b607..5f11ae88943d 100644 > > --- a/tools/perf/util/python.c > > +++ b/tools/perf/util/python.c > > @@ -1306,12 +1306,6 @@ PyMODINIT_FUNC PyInit_perf(void) > > /* The following are stubs to avoid dragging in builtin-* objects. */ > > /* TODO: move the code out of the builtin-* file into util. */ > > > > -int find_scripts(char **scripts_array __maybe_unused, char **scripts_path_array __maybe_unused, > > - int num __maybe_unused, int pathlen __maybe_unused) > > -{ > > - return -1; > > -} > > - > > void perf_stat__set_no_csv_summary(int set __maybe_unused) > > { > > } > > -- > > 2.47.0.163.g1226f6d8fa-goog > >
On Thu, Oct 31, 2024 at 01:51:36PM -0700, Ian Rogers wrote: > On Thu, Oct 31, 2024 at 12:18 PM Arnaldo Carvalho de Melo > <acme@kernel.org> wrote: > > > > On Wed, Oct 30, 2024 at 06:42:37PM -0700, Ian Rogers wrote: > > > The only use of find_scripts is in browser/scripts.c but the > > > definition in builtin causes linking problems requiring a stub in > > > python.c. Move the function to allow the stub to be removed. > > > > > > Rewrite the directory iteration to use openat so that large character > > > arrays aren't needed. The arrays are warned about potential buffer > > > overflows by GCC now that all the code exists in a single C file. > > > > Introducing is_directory_at() should be done as a prep patch, as the > > rest of the patch below could end up being reverted after some other > > patch used it, making the process more difficult. > > > > I mentioned cases like this in the past, so doing it again just for the > > record. > > This is highlighted in the commit message: > ``` > Rewrite the directory iteration to use openat so that large character > arrays aren't needed. The arrays are warned about potential buffer > overflows by GCC now that all the code exists in a single C file. > ``` > so without the change the code wouldn't build. The new is_directory_at > function is effectively 2 statements fstatat and S_ISDIR on the > result, it is put next to is_directory for consistency but could have > been a static function in the only C file to use it. > > For the record, patches introducing 2 line long functions can be > excessively noisy, especially in a 21 patch series. There is always > the declared but not used build error to worry about - here things > couldn't just be simply moved due to triggering a different build > error. Given the simplicity of the function here I made a decision not > to split up the work - the commit message would likely be longer than > the function. The work never intended to introduce is_directory_at but > was forced into it through a desire not to disable compiler warnings. This patch does more than just moving the code which can be easy to miss something in the middle. I think you can move the code as is without introducing build errors and then add new changes like using openat() on top (you may separate the change out of this series). I think it's ok to have a small change if it clearly has different semantics. Thanks, Namhyung
On Mon, Nov 4, 2024 at 11:47 AM Namhyung Kim <namhyung@kernel.org> wrote: > > On Thu, Oct 31, 2024 at 01:51:36PM -0700, Ian Rogers wrote: > > On Thu, Oct 31, 2024 at 12:18 PM Arnaldo Carvalho de Melo > > <acme@kernel.org> wrote: > > > > > > On Wed, Oct 30, 2024 at 06:42:37PM -0700, Ian Rogers wrote: > > > > The only use of find_scripts is in browser/scripts.c but the > > > > definition in builtin causes linking problems requiring a stub in > > > > python.c. Move the function to allow the stub to be removed. > > > > > > > > Rewrite the directory iteration to use openat so that large character > > > > arrays aren't needed. The arrays are warned about potential buffer > > > > overflows by GCC now that all the code exists in a single C file. > > > > > > Introducing is_directory_at() should be done as a prep patch, as the > > > rest of the patch below could end up being reverted after some other > > > patch used it, making the process more difficult. > > > > > > I mentioned cases like this in the past, so doing it again just for the > > > record. > > > > This is highlighted in the commit message: > > ``` > > Rewrite the directory iteration to use openat so that large character > > arrays aren't needed. The arrays are warned about potential buffer > > overflows by GCC now that all the code exists in a single C file. > > ``` > > so without the change the code wouldn't build. The new is_directory_at > > function is effectively 2 statements fstatat and S_ISDIR on the > > result, it is put next to is_directory for consistency but could have > > been a static function in the only C file to use it. > > > > For the record, patches introducing 2 line long functions can be > > excessively noisy, especially in a 21 patch series. There is always > > the declared but not used build error to worry about - here things > > couldn't just be simply moved due to triggering a different build > > error. Given the simplicity of the function here I made a decision not > > to split up the work - the commit message would likely be longer than > > the function. The work never intended to introduce is_directory_at but > > was forced into it through a desire not to disable compiler warnings. > > This patch does more than just moving the code which can be easy to miss > something in the middle. I think you can move the code as is without > introducing build errors and then add new changes like using openat() on > top (you may separate the change out of this series). I think it's > ok to have a small change if it clearly has different semantics. If you are trying to bisect to find something that broke a build, having commits that knowingly break the build will cause the bisect to fail. The bisect will falsely fail on the known to be broken commit. It should be unacceptable to knowingly break the build in a commit for this reason. Thanks, Ian
On Mon, Nov 04, 2024 at 12:34:47PM -0800, Ian Rogers wrote: > On Mon, Nov 4, 2024 at 11:47 AM Namhyung Kim <namhyung@kernel.org> wrote: > > > > On Thu, Oct 31, 2024 at 01:51:36PM -0700, Ian Rogers wrote: > > > On Thu, Oct 31, 2024 at 12:18 PM Arnaldo Carvalho de Melo > > > <acme@kernel.org> wrote: > > > > > > > > On Wed, Oct 30, 2024 at 06:42:37PM -0700, Ian Rogers wrote: > > > > > The only use of find_scripts is in browser/scripts.c but the > > > > > definition in builtin causes linking problems requiring a stub in > > > > > python.c. Move the function to allow the stub to be removed. > > > > > > > > > > Rewrite the directory iteration to use openat so that large character > > > > > arrays aren't needed. The arrays are warned about potential buffer > > > > > overflows by GCC now that all the code exists in a single C file. > > > > > > > > Introducing is_directory_at() should be done as a prep patch, as the > > > > rest of the patch below could end up being reverted after some other > > > > patch used it, making the process more difficult. > > > > > > > > I mentioned cases like this in the past, so doing it again just for the > > > > record. > > > > > > This is highlighted in the commit message: > > > ``` > > > Rewrite the directory iteration to use openat so that large character > > > arrays aren't needed. The arrays are warned about potential buffer > > > overflows by GCC now that all the code exists in a single C file. > > > ``` > > > so without the change the code wouldn't build. The new is_directory_at > > > function is effectively 2 statements fstatat and S_ISDIR on the > > > result, it is put next to is_directory for consistency but could have > > > been a static function in the only C file to use it. > > > > > > For the record, patches introducing 2 line long functions can be > > > excessively noisy, especially in a 21 patch series. There is always > > > the declared but not used build error to worry about - here things > > > couldn't just be simply moved due to triggering a different build > > > error. Given the simplicity of the function here I made a decision not > > > to split up the work - the commit message would likely be longer than > > > the function. The work never intended to introduce is_directory_at but > > > was forced into it through a desire not to disable compiler warnings. > > > > This patch does more than just moving the code which can be easy to miss > > something in the middle. I think you can move the code as is without > > introducing build errors and then add new changes like using openat() on > > top (you may separate the change out of this series). I think it's > > ok to have a small change if it clearly has different semantics. > > If you are trying to bisect to find something that broke a build, > having commits that knowingly break the build will cause the bisect to > fail. The bisect will falsely fail on the known to be broken commit. I'm not understanding, AFAIK nobody is advocating for breaking bisection, just to first instroduce a function, then use it to avoid: 1) Introduce function foo() and use it for feature bar() 2) Somebody else uses function foo() 3) We find a justification to revert 1) but can't, since it will break 2) so we need to add 4) that removes bar() from 1). - Arnaldo > It should be unacceptable to knowingly break the build in a commit for > this reason. > > Thanks, > Ian
On Mon, Nov 4, 2024 at 12:39 PM Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > On Mon, Nov 04, 2024 at 12:34:47PM -0800, Ian Rogers wrote: > > On Mon, Nov 4, 2024 at 11:47 AM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > On Thu, Oct 31, 2024 at 01:51:36PM -0700, Ian Rogers wrote: > > > > On Thu, Oct 31, 2024 at 12:18 PM Arnaldo Carvalho de Melo > > > > <acme@kernel.org> wrote: > > > > > > > > > > On Wed, Oct 30, 2024 at 06:42:37PM -0700, Ian Rogers wrote: > > > > > > The only use of find_scripts is in browser/scripts.c but the > > > > > > definition in builtin causes linking problems requiring a stub in > > > > > > python.c. Move the function to allow the stub to be removed. > > > > > > > > > > > > Rewrite the directory iteration to use openat so that large character > > > > > > arrays aren't needed. The arrays are warned about potential buffer > > > > > > overflows by GCC now that all the code exists in a single C file. > > > > > > > > > > Introducing is_directory_at() should be done as a prep patch, as the > > > > > rest of the patch below could end up being reverted after some other > > > > > patch used it, making the process more difficult. > > > > > > > > > > I mentioned cases like this in the past, so doing it again just for the > > > > > record. > > > > > > > > This is highlighted in the commit message: > > > > ``` > > > > Rewrite the directory iteration to use openat so that large character > > > > arrays aren't needed. The arrays are warned about potential buffer > > > > overflows by GCC now that all the code exists in a single C file. > > > > ``` > > > > so without the change the code wouldn't build. The new is_directory_at > > > > function is effectively 2 statements fstatat and S_ISDIR on the > > > > result, it is put next to is_directory for consistency but could have > > > > been a static function in the only C file to use it. > > > > > > > > For the record, patches introducing 2 line long functions can be > > > > excessively noisy, especially in a 21 patch series. There is always > > > > the declared but not used build error to worry about - here things > > > > couldn't just be simply moved due to triggering a different build > > > > error. Given the simplicity of the function here I made a decision not > > > > to split up the work - the commit message would likely be longer than > > > > the function. The work never intended to introduce is_directory_at but > > > > was forced into it through a desire not to disable compiler warnings. > > > > > > This patch does more than just moving the code which can be easy to miss > > > something in the middle. I think you can move the code as is without > > > introducing build errors and then add new changes like using openat() on > > > top (you may separate the change out of this series). I think it's > > > ok to have a small change if it clearly has different semantics. > > > > If you are trying to bisect to find something that broke a build, > > having commits that knowingly break the build will cause the bisect to > > fail. The bisect will falsely fail on the known to be broken commit. > > I'm not understanding, AFAIK nobody is advocating for breaking > bisection, just to first instroduce a function, then use it to avoid: > > 1) Introduce function foo() and use it for feature bar() > 2) Somebody else uses function foo() > 3) We find a justification to revert 1) but can't, since it will break > 2) so we need to add 4) that removes bar() from 1). Namhyung was asking that the c&p of code be 1 patch then "add new changes like using openat() on top". That is: patch 1: add is_directory_at - introduce the 2 line helper function patch 2: move the code patch 3: update the code to use is_directory_at patch 2 is known broken as patch 3 is fixing it. Hopefully this is clear. Thanks, Ian > > > It should be unacceptable to knowingly break the build in a commit for > > this reason. > > > > Thanks, > > Ian
On Mon, Nov 04, 2024 at 12:48:01PM -0800, Ian Rogers wrote: > On Mon, Nov 4, 2024 at 12:39 PM Arnaldo Carvalho de Melo > <acme@kernel.org> wrote: > > > > On Mon, Nov 04, 2024 at 12:34:47PM -0800, Ian Rogers wrote: > > > On Mon, Nov 4, 2024 at 11:47 AM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > > > On Thu, Oct 31, 2024 at 01:51:36PM -0700, Ian Rogers wrote: > > > > > On Thu, Oct 31, 2024 at 12:18 PM Arnaldo Carvalho de Melo > > > > > <acme@kernel.org> wrote: > > > > > > > > > > > > On Wed, Oct 30, 2024 at 06:42:37PM -0700, Ian Rogers wrote: > > > > > > > The only use of find_scripts is in browser/scripts.c but the > > > > > > > definition in builtin causes linking problems requiring a stub in > > > > > > > python.c. Move the function to allow the stub to be removed. > > > > > > > > > > > > > > Rewrite the directory iteration to use openat so that large character > > > > > > > arrays aren't needed. The arrays are warned about potential buffer > > > > > > > overflows by GCC now that all the code exists in a single C file. > > > > > > > > > > > > Introducing is_directory_at() should be done as a prep patch, as the > > > > > > rest of the patch below could end up being reverted after some other > > > > > > patch used it, making the process more difficult. > > > > > > > > > > > > I mentioned cases like this in the past, so doing it again just for the > > > > > > record. > > > > > > > > > > This is highlighted in the commit message: > > > > > ``` > > > > > Rewrite the directory iteration to use openat so that large character > > > > > arrays aren't needed. The arrays are warned about potential buffer > > > > > overflows by GCC now that all the code exists in a single C file. > > > > > ``` > > > > > so without the change the code wouldn't build. The new is_directory_at > > > > > function is effectively 2 statements fstatat and S_ISDIR on the > > > > > result, it is put next to is_directory for consistency but could have > > > > > been a static function in the only C file to use it. > > > > > > > > > > For the record, patches introducing 2 line long functions can be > > > > > excessively noisy, especially in a 21 patch series. There is always > > > > > the declared but not used build error to worry about - here things > > > > > couldn't just be simply moved due to triggering a different build > > > > > error. Given the simplicity of the function here I made a decision not > > > > > to split up the work - the commit message would likely be longer than > > > > > the function. The work never intended to introduce is_directory_at but > > > > > was forced into it through a desire not to disable compiler warnings. > > > > > > > > This patch does more than just moving the code which can be easy to miss > > > > something in the middle. I think you can move the code as is without > > > > introducing build errors and then add new changes like using openat() on > > > > top (you may separate the change out of this series). I think it's > > > > ok to have a small change if it clearly has different semantics. > > > > > > If you are trying to bisect to find something that broke a build, > > > having commits that knowingly break the build will cause the bisect to > > > fail. The bisect will falsely fail on the known to be broken commit. > > > > I'm not understanding, AFAIK nobody is advocating for breaking > > bisection, just to first instroduce a function, then use it to avoid: > > > > 1) Introduce function foo() and use it for feature bar() > > 2) Somebody else uses function foo() > > 3) We find a justification to revert 1) but can't, since it will break > > 2) so we need to add 4) that removes bar() from 1). > > Namhyung was asking that the c&p of code be 1 patch then "add new > changes like using openat() on top". That is: > > patch 1: add is_directory_at - introduce the 2 line helper function > patch 2: move the code > patch 3: update the code to use is_directory_at > > patch 2 is known broken as patch 3 is fixing it. > > Hopefully this is clear. Actually I don't care about the patch ordering. My request is not to break build and just to separate different changes out. :) Thanks, Namhyung
On Mon, Nov 4, 2024 at 1:00 PM Namhyung Kim <namhyung@kernel.org> wrote: > > On Mon, Nov 04, 2024 at 12:48:01PM -0800, Ian Rogers wrote: > > On Mon, Nov 4, 2024 at 12:39 PM Arnaldo Carvalho de Melo > > <acme@kernel.org> wrote: > > > > > > On Mon, Nov 04, 2024 at 12:34:47PM -0800, Ian Rogers wrote: > > > > On Mon, Nov 4, 2024 at 11:47 AM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > > > > > On Thu, Oct 31, 2024 at 01:51:36PM -0700, Ian Rogers wrote: > > > > > > On Thu, Oct 31, 2024 at 12:18 PM Arnaldo Carvalho de Melo > > > > > > <acme@kernel.org> wrote: > > > > > > > > > > > > > > On Wed, Oct 30, 2024 at 06:42:37PM -0700, Ian Rogers wrote: > > > > > > > > The only use of find_scripts is in browser/scripts.c but the > > > > > > > > definition in builtin causes linking problems requiring a stub in > > > > > > > > python.c. Move the function to allow the stub to be removed. > > > > > > > > > > > > > > > > Rewrite the directory iteration to use openat so that large character > > > > > > > > arrays aren't needed. The arrays are warned about potential buffer > > > > > > > > overflows by GCC now that all the code exists in a single C file. > > > > > > > > > > > > > > Introducing is_directory_at() should be done as a prep patch, as the > > > > > > > rest of the patch below could end up being reverted after some other > > > > > > > patch used it, making the process more difficult. > > > > > > > > > > > > > > I mentioned cases like this in the past, so doing it again just for the > > > > > > > record. > > > > > > > > > > > > This is highlighted in the commit message: > > > > > > ``` > > > > > > Rewrite the directory iteration to use openat so that large character > > > > > > arrays aren't needed. The arrays are warned about potential buffer > > > > > > overflows by GCC now that all the code exists in a single C file. > > > > > > ``` > > > > > > so without the change the code wouldn't build. The new is_directory_at > > > > > > function is effectively 2 statements fstatat and S_ISDIR on the > > > > > > result, it is put next to is_directory for consistency but could have > > > > > > been a static function in the only C file to use it. > > > > > > > > > > > > For the record, patches introducing 2 line long functions can be > > > > > > excessively noisy, especially in a 21 patch series. There is always > > > > > > the declared but not used build error to worry about - here things > > > > > > couldn't just be simply moved due to triggering a different build > > > > > > error. Given the simplicity of the function here I made a decision not > > > > > > to split up the work - the commit message would likely be longer than > > > > > > the function. The work never intended to introduce is_directory_at but > > > > > > was forced into it through a desire not to disable compiler warnings. > > > > > > > > > > This patch does more than just moving the code which can be easy to miss > > > > > something in the middle. I think you can move the code as is without > > > > > introducing build errors and then add new changes like using openat() on > > > > > top (you may separate the change out of this series). I think it's > > > > > ok to have a small change if it clearly has different semantics. > > > > > > > > If you are trying to bisect to find something that broke a build, > > > > having commits that knowingly break the build will cause the bisect to > > > > fail. The bisect will falsely fail on the known to be broken commit. > > > > > > I'm not understanding, AFAIK nobody is advocating for breaking > > > bisection, just to first instroduce a function, then use it to avoid: > > > > > > 1) Introduce function foo() and use it for feature bar() > > > 2) Somebody else uses function foo() > > > 3) We find a justification to revert 1) but can't, since it will break > > > 2) so we need to add 4) that removes bar() from 1). > > > > Namhyung was asking that the c&p of code be 1 patch then "add new > > changes like using openat() on top". That is: > > > > patch 1: add is_directory_at - introduce the 2 line helper function > > patch 2: move the code > > patch 3: update the code to use is_directory_at > > > > patch 2 is known broken as patch 3 is fixing it. > > > > Hopefully this is clear. > > Actually I don't care about the patch ordering. My request is not > to break build and just to separate different changes out. :) So, patch 2 can't be separated from patch 3 - are we agreed? So we squash patch 2 with patch 3. Patch 1 is trivial and fails to meet the bar of a meaningful change, so we squash that. We end up with this patch. If there's a later revert and a dependence of the 2 liner, just don't revert that part of the change. We've never had such a revert so it is hard to see why we need to generate so much churn because of it. Thanks, Ian
On Mon, Nov 04, 2024 at 01:06:35PM -0800, Ian Rogers wrote: > On Mon, Nov 4, 2024 at 1:00 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > On Mon, Nov 04, 2024 at 12:48:01PM -0800, Ian Rogers wrote: > > > Namhyung was asking that the c&p of code be 1 patch then "add new > > > changes like using openat() on top". That is: > > > > > > patch 1: add is_directory_at - introduce the 2 line helper function > > > patch 2: move the code > > > patch 3: update the code to use is_directory_at > > > > > > patch 2 is known broken as patch 3 is fixing it. > > > > > > Hopefully this is clear. > > > > Actually I don't care about the patch ordering. My request is not > > to break build and just to separate different changes out. :) > > So, patch 2 can't be separated from patch 3 - are we agreed? So we > squash patch 2 with patch 3. Patch 1 is trivial and fails to meet the > bar of a meaningful change, so we squash that. We end up with this > patch. If there's a later revert and a dependence of the 2 liner, just > don't revert that part of the change. We've never had such a revert so > it is hard to see why we need to generate so much churn because of it. As I said the patch 1 should be the c&p and no need to introduce is_directory_at() before that. Why not doing patch1: move the code patch2: add and use is_directory_at() + openat() ? Thanks, Namhyung
On Mon, Nov 4, 2024 at 2:09 PM Namhyung Kim <namhyung@kernel.org> wrote: > > On Mon, Nov 04, 2024 at 01:06:35PM -0800, Ian Rogers wrote: > > On Mon, Nov 4, 2024 at 1:00 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > On Mon, Nov 04, 2024 at 12:48:01PM -0800, Ian Rogers wrote: > > > > Namhyung was asking that the c&p of code be 1 patch then "add new > > > > changes like using openat() on top". That is: > > > > > > > > patch 1: add is_directory_at - introduce the 2 line helper function > > > > patch 2: move the code > > > > patch 3: update the code to use is_directory_at > > > > > > > > patch 2 is known broken as patch 3 is fixing it. > > > > > > > > Hopefully this is clear. > > > > > > Actually I don't care about the patch ordering. My request is not > > > to break build and just to separate different changes out. :) > > > > So, patch 2 can't be separated from patch 3 - are we agreed? So we > > squash patch 2 with patch 3. Patch 1 is trivial and fails to meet the > > bar of a meaningful change, so we squash that. We end up with this > > patch. If there's a later revert and a dependence of the 2 liner, just > > don't revert that part of the change. We've never had such a revert so > > it is hard to see why we need to generate so much churn because of it. > > As I said the patch 1 should be the c&p and no need to introduce > is_directory_at() before that. Why not doing > > patch1: move the code > patch2: add and use is_directory_at() + openat() > > ? Because placing all the code in 1 file expands GCC's analysis and the build fails. In the commit message I describe this: "The arrays are warned about potential buffer overflows by GCC now that all the code exists in a single C file." A standard unsound workaround to this is to change "sizeof(...)" to "sizeof(...) - 1", as it is ugly I added is_directory_at to not suffer the problem as the arrays are gone. Thanks, Ian
On Mon, Nov 04, 2024 at 02:20:30PM -0800, Ian Rogers wrote: > On Mon, Nov 4, 2024 at 2:09 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > On Mon, Nov 04, 2024 at 01:06:35PM -0800, Ian Rogers wrote: > > > On Mon, Nov 4, 2024 at 1:00 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > > > On Mon, Nov 04, 2024 at 12:48:01PM -0800, Ian Rogers wrote: > > > > > Namhyung was asking that the c&p of code be 1 patch then "add new > > > > > changes like using openat() on top". That is: > > > > > > > > > > patch 1: add is_directory_at - introduce the 2 line helper function > > > > > patch 2: move the code > > > > > patch 3: update the code to use is_directory_at > > > > > > > > > > patch 2 is known broken as patch 3 is fixing it. > > > > > > > > > > Hopefully this is clear. > > > > > > > > Actually I don't care about the patch ordering. My request is not > > > > to break build and just to separate different changes out. :) > > > > > > So, patch 2 can't be separated from patch 3 - are we agreed? So we > > > squash patch 2 with patch 3. Patch 1 is trivial and fails to meet the > > > bar of a meaningful change, so we squash that. We end up with this > > > patch. If there's a later revert and a dependence of the 2 liner, just > > > don't revert that part of the change. We've never had such a revert so > > > it is hard to see why we need to generate so much churn because of it. > > > > As I said the patch 1 should be the c&p and no need to introduce > > is_directory_at() before that. Why not doing > > > > patch1: move the code > > patch2: add and use is_directory_at() + openat() > > > > ? > > Because placing all the code in 1 file expands GCC's analysis and the > build fails. In the commit message I describe this: > "The arrays are warned about potential buffer overflows by GCC now > that all the code exists in a single C file." > A standard unsound workaround to this is to change "sizeof(...)" to > "sizeof(...) - 1", as it is ugly I added is_directory_at to not suffer > the problem as the arrays are gone. Ok, it's strange that this type of analysis depends on the placement. Anyway it seems there's a problem in the code already. Then we can fix it first and then move. How about this? patch1: add and use is_directory_at() + openat() patch2: move the code Thanks, Namhyung
On Mon, Nov 4, 2024 at 3:22 PM Namhyung Kim <namhyung@kernel.org> wrote: > > On Mon, Nov 04, 2024 at 02:20:30PM -0800, Ian Rogers wrote: > > On Mon, Nov 4, 2024 at 2:09 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > On Mon, Nov 04, 2024 at 01:06:35PM -0800, Ian Rogers wrote: > > > > On Mon, Nov 4, 2024 at 1:00 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > > > > > On Mon, Nov 04, 2024 at 12:48:01PM -0800, Ian Rogers wrote: > > > > > > Namhyung was asking that the c&p of code be 1 patch then "add new > > > > > > changes like using openat() on top". That is: > > > > > > > > > > > > patch 1: add is_directory_at - introduce the 2 line helper function > > > > > > patch 2: move the code > > > > > > patch 3: update the code to use is_directory_at > > > > > > > > > > > > patch 2 is known broken as patch 3 is fixing it. > > > > > > > > > > > > Hopefully this is clear. > > > > > > > > > > Actually I don't care about the patch ordering. My request is not > > > > > to break build and just to separate different changes out. :) > > > > > > > > So, patch 2 can't be separated from patch 3 - are we agreed? So we > > > > squash patch 2 with patch 3. Patch 1 is trivial and fails to meet the > > > > bar of a meaningful change, so we squash that. We end up with this > > > > patch. If there's a later revert and a dependence of the 2 liner, just > > > > don't revert that part of the change. We've never had such a revert so > > > > it is hard to see why we need to generate so much churn because of it. > > > > > > As I said the patch 1 should be the c&p and no need to introduce > > > is_directory_at() before that. Why not doing > > > > > > patch1: move the code > > > patch2: add and use is_directory_at() + openat() > > > > > > ? > > > > Because placing all the code in 1 file expands GCC's analysis and the > > build fails. In the commit message I describe this: > > "The arrays are warned about potential buffer overflows by GCC now > > that all the code exists in a single C file." > > A standard unsound workaround to this is to change "sizeof(...)" to > > "sizeof(...) - 1", as it is ugly I added is_directory_at to not suffer > > the problem as the arrays are gone. > > Ok, it's strange that this type of analysis depends on the placement. > Anyway it seems there's a problem in the code already. Then we can fix > it first and then move. How about this? > > patch1: add and use is_directory_at() + openat() > patch2: move the code I'm happy if the maintainers do that. Thanks, Ian
On Mon, Nov 04, 2024 at 03:28:15PM -0800, Ian Rogers wrote: > On Mon, Nov 4, 2024 at 3:22 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > On Mon, Nov 04, 2024 at 02:20:30PM -0800, Ian Rogers wrote: > > > On Mon, Nov 4, 2024 at 2:09 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > > > On Mon, Nov 04, 2024 at 01:06:35PM -0800, Ian Rogers wrote: > > > > > On Mon, Nov 4, 2024 at 1:00 PM Namhyung Kim <namhyung@kernel.org> wrote: > > > > > > > > > > > > On Mon, Nov 04, 2024 at 12:48:01PM -0800, Ian Rogers wrote: > > > > > > > Namhyung was asking that the c&p of code be 1 patch then "add new > > > > > > > changes like using openat() on top". That is: > > > > > > > > > > > > > > patch 1: add is_directory_at - introduce the 2 line helper function > > > > > > > patch 2: move the code > > > > > > > patch 3: update the code to use is_directory_at > > > > > > > > > > > > > > patch 2 is known broken as patch 3 is fixing it. > > > > > > > > > > > > > > Hopefully this is clear. > > > > > > > > > > > > Actually I don't care about the patch ordering. My request is not > > > > > > to break build and just to separate different changes out. :) > > > > > > > > > > So, patch 2 can't be separated from patch 3 - are we agreed? So we > > > > > squash patch 2 with patch 3. Patch 1 is trivial and fails to meet the > > > > > bar of a meaningful change, so we squash that. We end up with this > > > > > patch. If there's a later revert and a dependence of the 2 liner, just > > > > > don't revert that part of the change. We've never had such a revert so > > > > > it is hard to see why we need to generate so much churn because of it. > > > > > > > > As I said the patch 1 should be the c&p and no need to introduce > > > > is_directory_at() before that. Why not doing > > > > > > > > patch1: move the code > > > > patch2: add and use is_directory_at() + openat() > > > > > > > > ? > > > > > > Because placing all the code in 1 file expands GCC's analysis and the > > > build fails. In the commit message I describe this: > > > "The arrays are warned about potential buffer overflows by GCC now > > > that all the code exists in a single C file." > > > A standard unsound workaround to this is to change "sizeof(...)" to > > > "sizeof(...) - 1", as it is ugly I added is_directory_at to not suffer > > > the problem as the arrays are gone. > > > > Ok, it's strange that this type of analysis depends on the placement. > > Anyway it seems there's a problem in the code already. Then we can fix > > it first and then move. How about this? > > > > patch1: add and use is_directory_at() + openat() > > patch2: move the code > > I'm happy if the maintainers do that. It's probably not gonna happen anytime soon and I'd be happy if you could do that. Thanks, Namhyung
© 2016 - 2024 Red Hat, Inc.