Skip to content

Commit c310b33

Browse files
Mic92Ericson2314
andcommitted
Fix ParsedURL handling of %2F in URL paths
See the new extensive doxygen in `url.hh` Co-authored-by: John Ericson <[email protected]>
1 parent 8ee7479 commit c310b33

File tree

20 files changed

+453
-117
lines changed

20 files changed

+453
-117
lines changed

src/libfetchers/git-lfs-fetch.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ static LfsApiInfo getLfsApi(const ParsedURL & url)
6969

7070
args.push_back("--");
7171
args.push_back("git-lfs-authenticate");
72-
args.push_back(url.path);
72+
// FIXME %2F encode slashes? Does this command take/accept percent encoding?
73+
args.push_back(urlPathToCanonPath(url.path).abs());
7374
args.push_back("download");
7475

7576
auto [status, output] = runProgram({.program = "ssh", .args = args});

src/libfetchers/git.cc

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -460,9 +460,11 @@ struct GitInputScheme : InputScheme
460460
static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing
461461
auto url = parseURL(getStrAttr(input.attrs, "url"));
462462

463+
auto path = urlPathToCanonPath(url.path).abs();
464+
463465
// Why are we checking for bare repository?
464466
// well if it's a bare repository we want to force a git fetch rather than copying the folder
465-
bool isBareRepository = url.scheme == "file" && pathExists(url.path) && !pathExists(url.path + "/.git");
467+
bool isBareRepository = url.scheme == "file" && pathExists(path) && !pathExists(path + "/.git");
466468
//
467469
// FIXME: here we turn a possibly relative path into an absolute path.
468470
// This allows relative git flake inputs to be resolved against the
@@ -473,7 +475,7 @@ struct GitInputScheme : InputScheme
473475
// See: https://discourse.nixos.org/t/57783 and #9708
474476
//
475477
if (url.scheme == "file" && !forceHttp && !isBareRepository) {
476-
if (!isAbsolute(url.path)) {
478+
if (!isAbsolute(path)) {
477479
warn(
478480
"Fetching Git repository '%s', which uses a path relative to the current directory. "
479481
"This is not supported and will stop working in a future release. "
@@ -483,10 +485,10 @@ struct GitInputScheme : InputScheme
483485

484486
// If we don't check here for the path existence, then we can give libgit2 any directory
485487
// and it will initialize them as git directories.
486-
if (!pathExists(url.path)) {
487-
throw Error("The path '%s' does not exist.", url.path);
488+
if (!pathExists(path)) {
489+
throw Error("The path '%s' does not exist.", path);
488490
}
489-
repoInfo.location = std::filesystem::absolute(url.path);
491+
repoInfo.location = std::filesystem::absolute(path);
490492
} else {
491493
if (url.scheme == "file")
492494
/* Query parameters are meaningless for file://, but

src/libfetchers/github.cc

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ struct GitArchiveInputScheme : InputScheme
3838
if (url.scheme != schemeName())
3939
return {};
4040

41-
auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
41+
const auto & path = url.path;
4242

4343
std::optional<Hash> rev;
4444
std::optional<std::string> ref;
@@ -139,12 +139,12 @@ struct GitArchiveInputScheme : InputScheme
139139
auto repo = getStrAttr(input.attrs, "repo");
140140
auto ref = input.getRef();
141141
auto rev = input.getRev();
142-
auto path = owner + "/" + repo;
142+
std::vector<std::string> path{owner, repo};
143143
assert(!(ref && rev));
144144
if (ref)
145-
path += "/" + *ref;
145+
path.push_back(*ref);
146146
if (rev)
147-
path += "/" + rev->to_string(HashFormat::Base16, false);
147+
path.push_back(rev->to_string(HashFormat::Base16, false));
148148
auto url = ParsedURL{
149149
.scheme = std::string{schemeName()},
150150
.path = path,

src/libfetchers/indirect.cc

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ struct IndirectInputScheme : InputScheme
1414
if (url.scheme != "flake")
1515
return {};
1616

17-
auto path = tokenizeString<std::vector<std::string>>(url.path, "/");
17+
const auto & path = url.path;
1818

1919
std::optional<Hash> rev;
2020
std::optional<std::string> ref;
@@ -82,16 +82,15 @@ struct IndirectInputScheme : InputScheme
8282

8383
ParsedURL toURL(const Input & input) const override
8484
{
85-
ParsedURL url;
86-
url.scheme = "flake";
87-
url.path = getStrAttr(input.attrs, "id");
85+
ParsedURL url{
86+
.scheme = "flake",
87+
.path = {getStrAttr(input.attrs, "id")},
88+
};
8889
if (auto ref = input.getRef()) {
89-
url.path += '/';
90-
url.path += *ref;
90+
url.path.push_back(*ref);
9191
};
9292
if (auto rev = input.getRev()) {
93-
url.path += '/';
94-
url.path += rev->gitRev();
93+
url.path.push_back(rev->gitRev());
9594
};
9695
return url;
9796
}

src/libfetchers/mercurial.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ struct MercurialInputScheme : InputScheme
120120
{
121121
auto url = parseURL(getStrAttr(input.attrs, "url"));
122122
if (url.scheme == "file" && !input.getRef() && !input.getRev())
123-
return url.path;
123+
return urlPathToCanonPath(url.path).abs();
124124
return {};
125125
}
126126

@@ -152,7 +152,7 @@ struct MercurialInputScheme : InputScheme
152152
{
153153
auto url = parseURL(getStrAttr(input.attrs, "url"));
154154
bool isLocal = url.scheme == "file";
155-
return {isLocal, isLocal ? url.path : url.to_string()};
155+
return {isLocal, isLocal ? urlPathToCanonPath(url.path).abs() : url.to_string()};
156156
}
157157

158158
StorePath fetchToStore(ref<Store> store, Input & input) const

src/libfetchers/path.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ struct PathInputScheme : InputScheme
2020

2121
Input input{settings};
2222
input.attrs.insert_or_assign("type", "path");
23-
input.attrs.insert_or_assign("path", url.path);
23+
input.attrs.insert_or_assign("path", urlPathToCanonPath(url.path).abs());
2424

2525
for (auto & [name, value] : url.query)
2626
if (name == "rev" || name == "narHash")
@@ -74,7 +74,7 @@ struct PathInputScheme : InputScheme
7474
query.erase("__final");
7575
return ParsedURL{
7676
.scheme = "path",
77-
.path = getStrAttr(input.attrs, "path"),
77+
.path = splitString<std::vector<std::string>>("/", getStrAttr(input.attrs, "path")),
7878
.query = query,
7979
};
8080
}

src/libfetchers/tarball.cc

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,15 +107,16 @@ DownloadFileResult downloadFile(
107107
}
108108

109109
static DownloadTarballResult downloadTarball_(
110-
const Settings & settings, const std::string & url, const Headers & headers, const std::string & displayPrefix)
110+
const Settings & settings, const std::string & urlS, const Headers & headers, const std::string & displayPrefix)
111111
{
112+
auto url = parseURL(urlS);
112113

113114
// Some friendly error messages for common mistakes.
114115
// Namely lets catch when the url is a local file path, but
115116
// it is not in fact a tarball.
116-
if (url.rfind("file://", 0) == 0) {
117+
if (url.scheme == "file") {
117118
// Remove "file://" prefix to get the local file path
118-
std::string localPath = url.substr(7);
119+
std::string localPath = urlPathToCanonPath(url.path).abs();
119120
if (!std::filesystem::exists(localPath)) {
120121
throw Error("tarball '%s' does not exist.", localPath);
121122
}
@@ -128,7 +129,7 @@ static DownloadTarballResult downloadTarball_(
128129
}
129130
}
130131

131-
Cache::Key cacheKey{"tarball", {{"url", url}}};
132+
Cache::Key cacheKey{"tarball", {{"url", urlS}}};
132133

133134
auto cached = settings.getCache()->lookupExpired(cacheKey);
134135

@@ -153,7 +154,7 @@ static DownloadTarballResult downloadTarball_(
153154
auto _res = std::make_shared<Sync<FileTransferResult>>();
154155

155156
auto source = sinkToSource([&](Sink & sink) {
156-
FileTransferRequest req(parseURL(url));
157+
FileTransferRequest req(url);
157158
req.expectedETag = cached ? getStrAttr(cached->value, "etag") : "";
158159
getFileTransfer()->download(std::move(req), sink, [_res](FileTransferResult r) { *_res->lock() = r; });
159160
});
@@ -166,7 +167,7 @@ static DownloadTarballResult downloadTarball_(
166167

167168
/* Note: if the download is cached, `importTarball()` will receive
168169
no data, which causes it to import an empty tarball. */
169-
auto archive = hasSuffix(toLower(parseURL(url).path), ".zip") ? ({
170+
auto archive = !url.path.empty() && hasSuffix(toLower(url.path.back()), ".zip") ? ({
170171
/* In streaming mode, libarchive doesn't handle
171172
symlinks in zip files correctly (#10649). So write
172173
the entire file to disk so libarchive can access it
@@ -180,7 +181,7 @@ static DownloadTarballResult downloadTarball_(
180181
}
181182
TarArchive{path};
182183
})
183-
: TarArchive{*source};
184+
: TarArchive{*source};
184185
auto tarballCache = getTarballCache();
185186
auto parseSink = tarballCache->getFileSystemObjectSink();
186187
auto lastModified = unpackTarfileToSink(archive, *parseSink);
@@ -234,8 +235,11 @@ struct CurlInputScheme : InputScheme
234235
{
235236
const StringSet transportUrlSchemes = {"file", "http", "https"};
236237

237-
bool hasTarballExtension(std::string_view path) const
238+
bool hasTarballExtension(const ParsedURL & url) const
238239
{
240+
if (url.path.empty())
241+
return false;
242+
const auto & path = url.path.back();
239243
return hasSuffix(path, ".zip") || hasSuffix(path, ".tar") || hasSuffix(path, ".tgz")
240244
|| hasSuffix(path, ".tar.gz") || hasSuffix(path, ".tar.xz") || hasSuffix(path, ".tar.bz2")
241245
|| hasSuffix(path, ".tar.zst");
@@ -336,7 +340,7 @@ struct FileInputScheme : CurlInputScheme
336340
auto parsedUrlScheme = parseUrlScheme(url.scheme);
337341
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
338342
&& (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
339-
: (!requireTree && !hasTarballExtension(url.path)));
343+
: (!requireTree && !hasTarballExtension(url)));
340344
}
341345

342346
std::pair<ref<SourceAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
@@ -373,7 +377,7 @@ struct TarballInputScheme : CurlInputScheme
373377

374378
return transportUrlSchemes.count(std::string(parsedUrlScheme.transport))
375379
&& (parsedUrlScheme.application ? parsedUrlScheme.application.value() == schemeName()
376-
: (requireTree || hasTarballExtension(url.path)));
380+
: (requireTree || hasTarballExtension(url)));
377381
}
378382

379383
std::pair<ref<SourceAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override

src/libflake/flakeref.cc

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
143143
auto parsedURL = ParsedURL{
144144
.scheme = "git+file",
145145
.authority = ParsedURL::Authority{},
146-
.path = flakeRoot,
146+
.path = splitString<std::vector<std::string>>(flakeRoot.substr(1), "/"),
147147
.query = query,
148148
.fragment = fragment,
149149
};
@@ -172,7 +172,13 @@ std::pair<FlakeRef, std::string> parsePathFlakeRefWithFragment(
172172

173173
return fromParsedURL(
174174
fetchSettings,
175-
{.scheme = "path", .authority = ParsedURL::Authority{}, .path = path, .query = query, .fragment = fragment},
175+
{
176+
.scheme = "path",
177+
.authority = ParsedURL::Authority{},
178+
.path = splitString<std::vector<std::string>>(path.substr(1), "/"),
179+
.query = query,
180+
.fragment = fragment,
181+
},
176182
isFlake);
177183
}
178184

@@ -193,7 +199,8 @@ parseFlakeIdRef(const fetchers::Settings & fetchSettings, const std::string & ur
193199
auto parsedURL = ParsedURL{
194200
.scheme = "flake",
195201
.authority = ParsedURL::Authority{},
196-
.path = match[1],
202+
// A Flake ID never contains a slash
203+
.path = {match[1]},
197204
};
198205

199206
return std::make_pair(
@@ -211,8 +218,10 @@ std::optional<std::pair<FlakeRef, std::string>> parseURLFlakeRef(
211218
{
212219
try {
213220
auto parsed = parseURL(url, /*lenient=*/true);
214-
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(parsed.path))
215-
parsed.path = absPath(parsed.path, *baseDir);
221+
// FIXME
222+
auto path = urlPathToCanonPath(parsed.path).abs();
223+
if (baseDir && (parsed.scheme == "path" || parsed.scheme == "git+file") && !isAbsolute(path))
224+
parsed.path = splitString<std::vector<std::string>>(absPath(path, *baseDir), "/");
216225
return fromParsedURL(fetchSettings, std::move(parsed), isFlake);
217226
} catch (BadURL &) {
218227
return std::nullopt;

src/libflake/url-name.cc

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,21 @@ std::optional<std::string> getNameFromURL(const ParsedURL & url)
2727
return match.str(2);
2828
}
2929

30+
/* This is not right, because special chars like slashes within the
31+
path fragments should be percent encoded, but I don't think any
32+
of the regexes above care. */
33+
auto path = concatStringsSep("/", url.path);
34+
3035
/* If this is a github/gitlab/sourcehut flake, use the repo name */
31-
if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(url.path, match, secondPathSegmentRegex))
36+
if (std::regex_match(url.scheme, gitProviderRegex) && std::regex_match(path, match, secondPathSegmentRegex))
3237
return match.str(1);
3338

3439
/* If it is a regular git flake, use the directory name */
35-
if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(url.path, match, lastPathSegmentRegex))
40+
if (std::regex_match(url.scheme, gitSchemeRegex) && std::regex_match(path, match, lastPathSegmentRegex))
3641
return match.str(1);
3742

3843
/* If there is no fragment, take the last element of the path */
39-
if (std::regex_match(url.path, match, lastPathSegmentRegex))
44+
if (std::regex_match(path, match, lastPathSegmentRegex))
4045
return match.str(1);
4146

4247
/* If even that didn't work, the URL does not contain enough info to determine a useful name */

0 commit comments

Comments
 (0)