Skip to content

Commit 32d8035

Browse files
Mic92Ericson2314xokdvium
committed
Fix ParsedURL handling of %2F in URL paths
See the new extensive doxygen in `url.hh`. This fixes fetching gitlab: flakes. Paths are now stored as a std::vector of individual path segments, which can themselves contain path separators '/' (%2F). This is necessary to make the Gitlab's /projects/ API work. Co-authored-by: John Ericson <[email protected]> Co-authored-by: Sergei Zimmerman <[email protected]>
1 parent 4648cca commit 32d8035

File tree

19 files changed

+454
-135
lines changed

19 files changed

+454
-135
lines changed

src/libfetchers/git-lfs-fetch.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ static LfsApiInfo getLfsApi(const ParsedURL & url)
6969

7070
args.push_back("--");
7171
args.push_back("git-lfs-authenticate");
72-
args.push_back(url.path);
72+
// FIXME %2F encode slashes? Does this command take/accept percent encoding?
73+
args.push_back(url.renderPath(/*encode=*/false));
7374
args.push_back("download");
7475

7576
auto [status, output] = runProgram({.program = "ssh", .args = args});

src/libfetchers/git.cc

Lines changed: 26 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,12 @@ Path getCachePath(std::string_view key, bool shallow)
5656
// ...
5757
std::optional<std::string> readHead(const Path & path)
5858
{
59-
auto [status, output] = runProgram(
60-
RunOptions{
61-
.program = "git",
62-
// FIXME: use 'HEAD' to avoid returning all refs
63-
.args = {"ls-remote", "--symref", path},
64-
.isInteractive = true,
65-
});
59+
auto [status, output] = runProgram(RunOptions{
60+
.program = "git",
61+
// FIXME: use 'HEAD' to avoid returning all refs
62+
.args = {"ls-remote", "--symref", path},
63+
.isInteractive = true,
64+
});
6665
if (status != 0)
6766
return std::nullopt;
6867

@@ -320,18 +319,17 @@ struct GitInputScheme : InputScheme
320319

321320
writeFile(*repoPath / path.rel(), contents);
322321

323-
auto result = runProgram(
324-
RunOptions{
325-
.program = "git",
326-
.args =
327-
{"-C",
328-
repoPath->string(),
329-
"--git-dir",
330-
repoInfo.gitDir,
331-
"check-ignore",
332-
"--quiet",
333-
std::string(path.rel())},
334-
});
322+
auto result = runProgram(RunOptions{
323+
.program = "git",
324+
.args =
325+
{"-C",
326+
repoPath->string(),
327+
"--git-dir",
328+
repoInfo.gitDir,
329+
"check-ignore",
330+
"--quiet",
331+
std::string(path.rel())},
332+
});
335333
auto exitCode =
336334
#ifndef WIN32 // TODO abstract over exit status handling on Windows
337335
WEXITSTATUS(result.first)
@@ -462,8 +460,8 @@ struct GitInputScheme : InputScheme
462460

463461
// Why are we checking for bare repository?
464462
// well if it's a bare repository we want to force a git fetch rather than copying the folder
465-
bool isBareRepository = url.scheme == "file" && pathExists(url.path) && !pathExists(url.path + "/.git");
466-
//
463+
auto isBareRepository = [](PathView path) { return pathExists(path) && !pathExists(path + "/.git"); };
464+
467465
// FIXME: here we turn a possibly relative path into an absolute path.
468466
// This allows relative git flake inputs to be resolved against the
469467
// **current working directory** (as in POSIX), which tends to work out
@@ -472,8 +470,10 @@ struct GitInputScheme : InputScheme
472470
//
473471
// See: https://discourse.nixos.org/t/57783 and #9708
474472
//
475-
if (url.scheme == "file" && !forceHttp && !isBareRepository) {
476-
if (!isAbsolute(url.path)) {
473+
if (url.scheme == "file" && !forceHttp && !isBareRepository(renderUrlPathEnsureLegal(url.path))) {
474+
auto path = renderUrlPathEnsureLegal(url.path);
475+
476+
if (!isAbsolute(path)) {
477477
warn(
478478
"Fetching Git repository '%s', which uses a path relative to the current directory. "
479479
"This is not supported and will stop working in a future release. "
@@ -483,10 +483,10 @@ struct GitInputScheme : InputScheme
483483

484484
// If we don't check here for the path existence, then we can give libgit2 any directory
485485
// and it will initialize them as git directories.
486-
if (!pathExists(url.path)) {
487-
throw Error("The path '%s' does not exist.", url.path);
486+
if (!pathExists(path)) {
487+
throw Error("The path '%s' does not exist.", path);
488488
}
489-
repoInfo.location = std::filesystem::absolute(url.path);
489+
repoInfo.location = std::filesystem::absolute(path);
490490
} else {
491491
if (url.scheme == "file")
492492
/* Query parameters are meaningless for file://, but

src/libfetchers/github.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ struct GitArchiveInputScheme : InputScheme
3838
if (url.scheme != schemeName())
3939
return {};
4040

41-
auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
41+
const auto & path = url.path;
4242

4343
std::optional<Hash> rev;
4444
std::optional<std::string> ref;
@@ -139,12 +139,12 @@ struct GitArchiveInputScheme : InputScheme
139139
auto repo = getStrAttr(input.attrs, "repo");
140140
auto ref = input.getRef();
141141
auto rev = input.getRev();
142-
auto path = owner + "/" + repo;
142+
std::vector<std::string> path{owner, repo};
143143
assert(!(ref && rev));
144144
if (ref)
145-
path += "/" + *ref;
145+
path.push_back(*ref);
146146
if (rev)
147-
path += "/" + rev->to_string(HashFormat::Base16, false);
147+
path.push_back(rev->to_string(HashFormat::Base16, false));
148148
auto url = ParsedURL{
149149
.scheme = std::string{schemeName()},
150150
.path = path,

src/libfetchers/indirect.cc

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ struct IndirectInputScheme : InputScheme
1414
if (url.scheme != "flake")
1515
return {};
1616

17-
auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
17+
const auto & path = url.path;
1818

1919
std::optional<Hash> rev;
2020
std::optional<std::string> ref;
@@ -82,16 +82,15 @@ struct IndirectInputScheme : InputScheme
8282

8383
ParsedURL toURL(const Input & input) const override
8484
{
85-
ParsedURL url;
86-
url.scheme = "flake";
87-
url.path = getStrAttr(input.attrs, "id");
85+
ParsedURL url{
86+
.scheme = "flake",
87+
.path = {getStrAttr(input.attrs, "id")},
88+
};
8889
if (auto ref = input.getRef()) {
89-
url.path += '/';
90-
url.path += *ref;
90+
url.path.push_back(*ref);
9191
};
9292
if (auto rev = input.getRev()) {
93-
url.path += '/';
94-
url.path += rev->gitRev();
93+
url.path.push_back(rev->gitRev());
9594
};
9695
return url;
9796
}

src/libfetchers/mercurial.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ struct MercurialInputScheme : InputScheme
120120
{
121121
auto url = parseURL(getStrAttr(input.attrs, "url"));
122122
if (url.scheme == "file" && !input.getRef() && !input.getRev())
123-
return url.path;
123+
return renderUrlPathEnsureLegal(url.path);
124124
return {};
125125
}
126126

@@ -152,7 +152,7 @@ struct MercurialInputScheme : InputScheme
152152
{
153153
auto url = parseURL(getStrAttr(input.attrs, "url"));
154154
bool isLocal = url.scheme == "file";
155-
return {isLocal, isLocal ? url.path : url.to_string()};
155+
return {isLocal, isLocal ? renderUrlPathEnsureLegal(url.path) : url.to_string()};
156156
}
157157

158158
StorePath fetchToStore(ref<Store> store, Input & input) const

src/libfetchers/path.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ struct PathInputScheme : InputScheme
2020

2121
Input input{settings};
2222
input.attrs.insert_or_assign("type", "path");
23-
input.attrs.insert_or_assign("path", url.path);
23+
input.attrs.insert_or_assign("path", renderUrlPathEnsureLegal(url.path));
2424

2525
for (auto & [name, value] : url.query)
2626
if (name == "rev" || name == "narHash")
@@ -74,7 +74,7 @@ struct PathInputScheme : InputScheme
7474
query.erase("__final");
7575
return ParsedURL{
7676
.scheme = "path",
77-
.path = getStrAttr(input.attrs, "path"),
77+
.path = splitString<std::vector<std::string>>(getStrAttr(input.attrs, "path"), "/"),
7878
.query = query,
7979
};
8080
}

src/libfetchers/tarball.cc

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -107,19 +107,19 @@ DownloadFileResult downloadFile(
107107
}
108108

109109
static DownloadTarballResult downloadTarball_(
110-
const Settings & settings, const std::string & url, const Headers & headers, const std::string & displayPrefix)
110+
const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix)
111111
{
112+
auto url = parseURL(urlS);
112113

113114
// Some friendly error messages for common mistakes.
114115
// Namely lets catch when the url is a local file path, but
115116
// it is not in fact a tarball.
116-
if (url.rfind("file://", 0) == 0) {
117-
// Remove "file://" prefix to get the local file path
118-
std::string localPath = url.substr(7);
119-
if (!std::filesystem::exists(localPath)) {
117+
if (url.scheme == "file") {
118+
std::filesystem::path localPath = renderUrlPathEnsureLegal(url.path);
119+
if (!exists(localPath)) {
120120
throw Error("tarball '%s' does not exist.", localPath);
121121
}
122-
if (std::filesystem::is_directory(localPath)) {
122+
if (is_directory(localPath)) {
123123
if (std::filesystem::exists(localPath + "/.git")) {
124124
throw Error(
125125
"tarball '%s' is a git repository, not a tarball. Please use `git+file` as the scheme.", localPath);
@@ -128,7 +128,7 @@ static DownloadTarballResult downloadTarball_(
128128
}
129129
}
130130

131-
Cache::Key cacheKey{"tarball", {{"url", url}}};
131+
Cache::Key cacheKey{"tarball", {{"url", urlS}}};
132132

133133
auto cached = settings.getCache()->lookupExpired(cacheKey);
134134

@@ -153,7 +153,7 @@ static DownloadTarballResult downloadTarball_(
153153
auto _res = std::make_shared<Sync<FileTransferResult>>();
154154

155155
auto source = sinkToSource([&](Sink & sink) {
156-
FileTransferRequest req(parseURL(url));
156+
FileTransferRequest req(url);
157157
req.expectedETag = cached ? getStrAttr(cached->value, "etag") : "";
158158
getFileTransfer()->download(std::move(req), sink, [_res](FileTransferResult r) { *_res->lock() = r; });
159159
});
@@ -166,7 +166,7 @@ static DownloadTarballResult downloadTarball_(
166166

167167
/* Note: if the download is cached, `importTarball()` will receive
168168
no data, which causes it to import an empty tarball. */
169-
auto archive = hasSuffix(toLower(parseURL(url).path), ".zip") ? ({
169+
auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({
170170
/* In streaming mode, libarchive doesn't handle
171171
symlinks in zip files correctly (#10649). So write
172172
the entire file to disk so libarchive can access it
@@ -180,7 +180,7 @@ static DownloadTarballResult downloadTarball_(
180180
}
181181
TarArchive{path};
182182
})
183-
: TarArchive{*source};
183+
: TarArchive{*source};
184184
auto tarballCache = getTarballCache();
185185
auto parseSink = tarballCache->getFileSystemObjectSink();
186186
auto lastModified = unpackTarfileToSink(archive, *parseSink);
@@ -234,8 +234,11 @@ struct CurlInputScheme : InputScheme
234234
{
235235
const StringSet transportUrlSchemes = {"file", "http", "https"};
236236

237-
bool hasTarballExtension(std::string_view path) const
237+
bool hasTarballExtension(const ParsedURL & url) const
238238
{
239+
if (url.path.empty())
240+
return false;
241+
const auto & path = url.path.back();
239242
return hasSuffix(path, ".zip") || hasSuffix(path, ".tar") || hasSuffix(path, ".tgz")
240243
|| hasSuffix(path, ".tar.gz") || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
241244
|| hasSuffix(path, ".tar.zst");
@@ -336,7 +339,7 @@ struct FileInputScheme : CurlInputScheme
336339
auto parsedUrlScheme = parseUrlScheme(url.scheme);
337340
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
338341
&& (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
339-
: (!requireTree && !hasTarballExtension(url.path)));
342+
: (!requireTree && !hasTarballExtension(url)));
340343
}
341344

342345
std::pair<ref<SourceAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
@@ -373,7 +376,7 @@ struct TarballInputScheme : CurlInputScheme
373376

374377
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
375378
&& (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
376-
: (requireTree || hasTarballExtension(url.path)));
379+
: (requireTree || hasTarballExtension(url)));
377380
}
378381

379382
std::pair<ref<SourceAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override

src/libflake/flakeref.cc

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
143143
auto parsedURL = ParsedURL{
144144
.scheme = "git+file",
145145
.authority = ParsedURL::Authority{},
146-
.path = flakeRoot,
146+
.path = splitString<std::vector<std::string>>(flakeRoot, "/"),
147147
.query = query,
148148
.fragment = fragment,
149149
};
@@ -172,7 +172,13 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
172172

173173
return fromParsedURL(
174174
fetchSettings,
175-
{.scheme = "path", .authority = ParsedURL::Authority{}, .path = path, .query = query, .fragment = fragment},
175+
{
176+
.scheme = "path",
177+
.authority = ParsedURL::Authority{},
178+
.path = splitString<std::vector<std::string>>(path, "/"),
179+
.query = query,
180+
.fragment = fragment,
181+
},
176182
isFlake);
177183
}
178184

@@ -193,7 +199,7 @@ parseFlakeIdRef(const fetchers::Settings & fetchSettings, const std::string & ur
193199
auto parsedURL = ParsedURL{
194200
.scheme = "flake",
195201
.authority = ParsedURL::Authority{},
196-
.path = match[1],
202+
.path = splitString<std::vector<std::string>>(match[1].str(), "/"),
197203
};
198204

199205
return std::make_pair(
@@ -211,8 +217,12 @@ std::optional<std::pair<FlakeRef, std::string>> parseURLFlakeRef(
211217
{
212218
try {
213219
auto parsed = parseURL(url, /*lenient=*/true);
214-
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path))
215-
parsed.path = absPath(parsed.path, *baseDir);
220+
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file")) {
221+
/* Here we know that the path must not contain encoded '/' or NUL bytes. */
222+
auto path = renderUrlPathEnsureLegal(parsed.path);
223+
if (!isAbsolute(path))
224+
parsed.path = splitString<std::vector<std::string>>(absPath(path, *baseDir), "/");
225+
}
216226
return fromParsedURL(fetchSettings, std::move(parsed), isFlake);
217227
} catch (BadURL &) {
218228
return std::nullopt;

src/libflake/url-name.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,21 @@ std::optional<std::string> getNameFromURL(const ParsedURL & url)
2727
return match.str(2);
2828
}
2929

30+
/* This is not right, because special chars like slashes within the
31+
path fragments should be percent encoded, but I don't think any
32+
of the regexes above care. */
33+
auto path = concatStringsSep("/", url.path);
34+
3035
/* If this is a github/gitlab/sourcehut flake, use the repo name */
31-
if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(url.path, match, secondPathSegmentRegex))
36+
if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(path, match, secondPathSegmentRegex))
3237
return match.str(1);
3338

3439
/* If it is a regular git flake, use the directory name */
35-
if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(url.path, match, lastPathSegmentRegex))
40+
if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(path, match, lastPathSegmentRegex))
3641
return match.str(1);
3742

3843
/* If there is no fragment, take the last element of the path */
39-
if (std::regex_match(url.path, match, lastPathSegmentRegex))
44+
if (std::regex_match(path, match, lastPathSegmentRegex))
4045
return match.str(1);
4146

4247
/* If even that didn't work, the URL does not contain enough info to determine a useful name */

0 commit comments

Comments
 (0)