-
Notifications
You must be signed in to change notification settings - Fork 308
/
Pip.kt
788 lines (646 loc) · 33.7 KB
/
Pip.kt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
/*
* Copyright (C) 2017-2021 HERE Europe B.V.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
* License-Filename: LICENSE
*/
package org.ossreviewtoolkit.analyzer.managers
import com.fasterxml.jackson.databind.JsonNode
import com.fasterxml.jackson.databind.node.ArrayNode
import com.vdurmont.semver4j.Requirement
import java.io.File
import java.util.SortedSet
import org.apache.logging.log4j.kotlin.Logging
import org.ossreviewtoolkit.analyzer.AbstractPackageManagerFactory
import org.ossreviewtoolkit.analyzer.PackageManager
import org.ossreviewtoolkit.downloader.VersionControlSystem
import org.ossreviewtoolkit.model.Hash
import org.ossreviewtoolkit.model.Identifier
import org.ossreviewtoolkit.model.Package
import org.ossreviewtoolkit.model.PackageReference
import org.ossreviewtoolkit.model.Project
import org.ossreviewtoolkit.model.ProjectAnalyzerResult
import org.ossreviewtoolkit.model.RemoteArtifact
import org.ossreviewtoolkit.model.Scope
import org.ossreviewtoolkit.model.VcsInfo
import org.ossreviewtoolkit.model.config.AnalyzerConfiguration
import org.ossreviewtoolkit.model.config.RepositoryConfiguration
import org.ossreviewtoolkit.model.jsonMapper
import org.ossreviewtoolkit.utils.common.CommandLineTool
import org.ossreviewtoolkit.utils.common.Os
import org.ossreviewtoolkit.utils.common.ProcessCapture
import org.ossreviewtoolkit.utils.common.collectMessages
import org.ossreviewtoolkit.utils.common.normalizeLineBreaks
import org.ossreviewtoolkit.utils.common.safeDeleteRecursively
import org.ossreviewtoolkit.utils.common.textValueOrEmpty
import org.ossreviewtoolkit.utils.ort.DeclaredLicenseProcessor
import org.ossreviewtoolkit.utils.ort.OkHttpClientHelper
import org.ossreviewtoolkit.utils.ort.createOrtTempDir
import org.ossreviewtoolkit.utils.ort.createOrtTempFile
import org.ossreviewtoolkit.utils.ort.showStackTrace
import org.ossreviewtoolkit.utils.spdx.SpdxLicenseIdExpression
// Use the most recent version that still supports Python 2. PIP 21.0.0 dropped Python 2 support, see
// https://pip.pypa.io/en/stable/news/#id176.
private const val PIP_VERSION = "20.3.4"
private val PHONY_DEPENDENCIES = mapOf(
"pkg-resources" to "0.0.0", // Added by a bug with some Ubuntu distributions.
)
private fun isPhonyDependency(name: String, version: String): Boolean =
PHONY_DEPENDENCIES[name].orEmpty().let { ignoredVersion ->
PHONY_DEPENDENCIES.containsKey(name) && (ignoredVersion.isEmpty() || version == ignoredVersion)
}
object VirtualEnv : CommandLineTool {
override fun command(workingDir: File?) = "virtualenv"
override fun transformVersion(output: String) =
// The version string can be something like:
// 16.6.1
// virtualenv 20.0.14 from /usr/local/lib/python2.7/dist-packages/virtualenv/__init__.pyc
output.removePrefix("virtualenv ").substringBefore(' ')
// Ensure a minimum version known to work. Note that virtualenv bundles a version of pip, and as of pip 20.3 a new
// dependency resolver is used, see http://pyfound.blogspot.com/2020/11/pip-20-3-new-resolver.html.
override fun getVersionRequirement(): Requirement = Requirement.buildIvy("[15.1,)")
}
object PythonVersion : CommandLineTool, Logging {
// To use a specific version of Python on Windows we can use the "py" command with argument "-2" or "-3", see
// https://docs.python.org/3/installing/#work-with-multiple-versions-of-python-installed-in-parallel.
override fun command(workingDir: File?) = if (Os.isWindows) "py" else "python3"
override fun transformVersion(output: String) = output.removePrefix("Python ")
/**
* Check all Python files in [workingDir] and return which version of Python they are compatible with. If all files
* are compatible with Python 3, "3" is returned. If at least one file is incompatible with Python 3, "2" is
* returned.
*/
fun getPythonMajorVersion(workingDir: File): Int {
val scriptFile = createOrtTempFile("python_compatibility", ".py")
scriptFile.writeBytes(javaClass.getResource("/scripts/python_compatibility.py").readBytes())
try {
// The helper script itself always has to be run with Python 3.
val scriptCmd = if (Os.isWindows) {
run("-3", scriptFile.path, "-d", workingDir.path)
} else {
run(scriptFile.path, "-d", workingDir.path)
}
return scriptCmd.stdout.toInt()
} finally {
if (!scriptFile.delete()) {
logger.warn { "Helper script file '$scriptFile' could not be deleted." }
}
}
}
/**
* Return the absolute path to the Python interpreter for the given [version]. This is helpful as esp. on Windows
* different Python versions can be installed in arbitrary locations, and the Python executable is even usually
* called the same in those locations. Return `null` if no matching Python interpreter is available.
*/
fun getPythonInterpreter(version: Int): String? =
if (Os.isWindows) {
val installedVersions = run("--list-paths").stdout
val versionAndPath = installedVersions.lines().find { line ->
line.startsWith(" -$version")
}
// Parse a line like " -2.7-32 C:\Python27\python.exe".
versionAndPath?.split(' ', limit = 3)?.last()?.trimStart()
} else {
Os.getPathFromEnvironment("python$version")?.path
}
}
object PythonInspector : CommandLineTool {
override fun command(workingDir: File?) = "python-inspector"
override fun transformVersion(output: String) = output.removePrefix("Python-inspector version: ")
override fun getVersionRequirement(): Requirement = Requirement.buildIvy("[0.6.5,)")
fun run(
workingDir: File,
outputFile: String,
definitionFile: File,
pythonVersion: String = "38",
): ProcessCapture {
val commandLineOptions = buildList {
add("--python-version")
add(pythonVersion)
add("--json-pdt")
add(outputFile)
if (definitionFile.name == "setup.py") {
add("--setup-py")
} else {
add("--requirement")
}
add(definitionFile.absolutePath)
}
return run(workingDir, *commandLineOptions.toTypedArray())
}
}
/**
* The [PIP](https://pip.pypa.io/) package manager for Python. Also see
* [install_requires vs requirements files](https://packaging.python.org/discussions/install-requires-vs-requirements/)
* and [setup.py vs. requirements.txt](https://caremad.io/posts/2013/07/setup-vs-requirement/).
*/
@Suppress("TooManyFunctions")
class Pip(
name: String,
analysisRoot: File,
analyzerConfig: AnalyzerConfiguration,
repoConfig: RepositoryConfiguration
) : PackageManager(name, analysisRoot, analyzerConfig, repoConfig), CommandLineTool {
companion object : Logging {
private const val GENERIC_BSD_LICENSE = "BSD License"
private const val SHORT_STRING_MAX_CHARS = 200
private val INSTALL_OPTIONS = arrayOf(
"--no-warn-conflicts",
"--prefer-binary"
)
// TODO: Need to replace this hard-coded list of domains with e.g. a command line option.
private val TRUSTED_HOSTS = listOf(
"pypi.org",
"pypi.python.org" // Legacy
).flatMap { listOf("--trusted-host", it) }.toTypedArray()
}
class Factory : AbstractPackageManagerFactory<Pip>("PIP") {
override val globsForDefinitionFiles = listOf("*requirements*.txt", "setup.py")
override fun create(
analysisRoot: File,
analyzerConfig: AnalyzerConfiguration,
repoConfig: RepositoryConfiguration
) = Pip(managerName, analysisRoot, analyzerConfig, repoConfig)
}
override fun command(workingDir: File?) = "pip"
override fun transformVersion(output: String) = output.removePrefix("pip ").substringBefore(' ')
private fun runPipInVirtualEnv(virtualEnvDir: File, workingDir: File, vararg commandArgs: String) =
runInVirtualEnv(virtualEnvDir, workingDir, command(workingDir), *TRUSTED_HOSTS, *commandArgs)
private fun runInVirtualEnv(
virtualEnvDir: File,
workingDir: File,
commandName: String,
vararg commandArgs: String
): ProcessCapture {
val binDir = if (Os.isWindows) "Scripts" else "bin"
val command = virtualEnvDir.resolve(binDir).resolve(commandName)
val resolvedCommand = Os.resolveWindowsExecutable(command)?.takeIf { Os.isWindows } ?: command
// TODO: Maybe work around long shebang paths in generated scripts within a virtualenv by calling the Python
// executable in the virtualenv directly, see https://github.com/pypa/virtualenv/issues/997.
val process = ProcessCapture(workingDir, resolvedCommand.path, *commandArgs)
logger.debug { process.stdout }
return process
}
override fun beforeResolution(definitionFiles: List<File>) = VirtualEnv.checkVersion()
override fun resolveDependencies(definitionFile: File, labels: Map<String, String>): List<ProjectAnalyzerResult> {
// For an overview, dependency resolution involves the following steps:
// 1. Get metadata about the local project via `python setup.py`.
// 2. Get the hierarchy of dependencies via python-inspector.
// 3. Get additional remote package metadata via PyPI JSON.
val workingDir = definitionFile.parentFile
// Try to determine the Python version the project requires.
val pythonMajorVersion = PythonVersion.getPythonMajorVersion(workingDir)
val virtualEnvDir = setupVirtualEnv(workingDir, definitionFile, pythonMajorVersion)
val project = getProjectBasics(definitionFile, virtualEnvDir)
val (packages, installDependencies) = getInstallDependencies(definitionFile, virtualEnvDir, pythonMajorVersion)
// TODO: Handle "extras" and "tests" dependencies.
val scopes = sortedSetOf(
Scope("install", installDependencies)
)
// Remove the virtualenv by simply deleting the directory.
virtualEnvDir.safeDeleteRecursively()
return listOf(ProjectAnalyzerResult(project.copy(scopeDependencies = scopes), packages))
}
private fun getProjectBasics(definitionFile: File, virtualEnvDir: File): Project {
val authors = sortedSetOf<String>()
val declaredLicenses = sortedSetOf<String>()
val workingDir = definitionFile.parentFile
// First try to get metadata from "setup.py" in any case, even for "requirements.txt" projects.
val (setupName, setupVersion, setupHomepage) = if (workingDir.resolve("setup.py").isFile) {
// See https://docs.python.org/3.8/distutils/setupscript.html#additional-meta-data.
fun getSetupPyMetadata(option: String): String? {
val process = runInVirtualEnv(virtualEnvDir, workingDir, "python", "setup.py", option)
val metadata = process.stdout.trim()
return metadata.takeUnless { process.isError || metadata == "UNKNOWN" }
}
parseAuthorString(getSetupPyMetadata("--author")).also { authors += it }
getLicenseFromLicenseField(getSetupPyMetadata("--license"))?.also { declaredLicenses += it }
getSetupPyMetadata("--classifiers")?.lines()?.mapNotNullTo(declaredLicenses) {
getLicenseFromClassifier(it)
}
listOf(
getSetupPyMetadata("--name").orEmpty(),
getSetupPyMetadata("--version").orEmpty(),
getSetupPyMetadata("--url").orEmpty()
)
} else {
listOf("", "", "")
}
// Try to get additional information from any "requirements.txt" file.
val (requirementsName, requirementsVersion, requirementsSuffix) = if (definitionFile.name != "setup.py") {
val pythonVersionLines = definitionFile.readLines().filter { "python_version" in it }
if (pythonVersionLines.isNotEmpty()) {
logger.debug {
"Some dependencies have Python version requirements:\n$pythonVersionLines"
}
}
// In case of "requirements*.txt" there is no metadata at all available, so use the parent directory name
// plus what "*" expands to as the project name and the VCS revision, if any, as the project version.
val suffix = definitionFile.name.removePrefix("requirements").removeSuffix(".txt")
val name = definitionFile.parentFile.name + suffix
val version = VersionControlSystem.getCloneInfo(workingDir).revision
listOf(name, version, suffix)
} else {
listOf("", "", "")
}
// Amend information from "setup.py" with that from "requirements.txt".
val hasSetupName = setupName.isNotEmpty()
val hasRequirementsName = requirementsName.isNotEmpty()
val projectName = when {
hasSetupName && !hasRequirementsName -> setupName
// In case of only a requirements file without further metadata, use the relative path to the analyzer
// root as a unique project name.
!hasSetupName && hasRequirementsName -> definitionFile.relativeTo(analysisRoot).invariantSeparatorsPath
hasSetupName && hasRequirementsName -> "$setupName-requirements$requirementsSuffix"
else -> throw IllegalArgumentException("Unable to determine a project name for '$definitionFile'.")
}
val projectVersion = setupVersion.takeIf { it.isNotEmpty() } ?: requirementsVersion
return Project(
id = Identifier(
type = managerName,
namespace = "",
name = projectName,
version = projectVersion
),
definitionFilePath = VersionControlSystem.getPathInfo(definitionFile).path,
authors = authors,
declaredLicenses = declaredLicenses,
vcs = VcsInfo.EMPTY,
vcsProcessed = processProjectVcs(workingDir, VcsInfo.EMPTY, setupHomepage),
homepageUrl = setupHomepage
)
}
private fun getInstallDependencies(
definitionFile: File,
virtualEnvDir: File,
pythonMajorVersion: Int
): Pair<SortedSet<Package>, SortedSet<PackageReference>> {
val packages = sortedSetOf<Package>()
val installDependencies = sortedSetOf<PackageReference>()
val workingDir = definitionFile.parentFile
val jsonFile = createOrtTempDir().resolve("python-inspector.json")
val pythonVersion = when (pythonMajorVersion) {
2 -> "2.7" // 2.7 is the only 2.x version supported by python-inspector.
3 -> "3.10" // 3.10 is the version currently used in the ORT Docker image.
else -> throw IllegalArgumentException("Unsupported Python major version '$pythonMajorVersion'.")
}
logger.info {
"Resolving dependencies for '${definitionFile.absolutePath}' with Python version '$pythonVersion'."
}
runCatching {
try {
PythonInspector.run(
workingDir = workingDir,
outputFile = jsonFile.absolutePath,
definitionFile = definitionFile,
pythonVersion = pythonVersion.replace(".", "")
)
} finally {
workingDir.resolve(".cache").safeDeleteRecursively(force = true)
}
}.onFailure { e ->
e.showStackTrace()
logger.error {
"Unable to determine dependencies for definition file '${definitionFile.absolutePath}': " +
e.collectMessages()
}
}.getOrThrow()
// Get the locally available metadata for all installed packages as a fallback.
val installedPackages = getInstalledPackagesWithLocalMetaData(virtualEnvDir, workingDir).associateBy { it.id }
val fullDependencyTree = jsonMapper.readTree(jsonFile)
jsonFile.parentFile.safeDeleteRecursively(force = true)
val projectDependencies = fullDependencyTree.filterNot {
isPhonyDependency(
it["package_name"].textValue(),
it["installed_version"].textValueOrEmpty()
)
}
val allIds = sortedSetOf<Identifier>()
parseDependencies(projectDependencies, allIds, installDependencies)
// Enrich the package templates with additional metadata from PyPI.
allIds.mapTo(packages) { id ->
// TODO: Retrieve metadata of package not hosted on PyPI by querying the respective repository.
getPackageFromPyPi(id).enrichWith(installedPackages[id])
}
return packages to installDependencies
}
private fun getBinaryArtifact(releaseNode: ArrayNode?): RemoteArtifact {
releaseNode ?: return RemoteArtifact.EMPTY
// Prefer python wheels and fall back to the first entry (probably a sdist).
val binaryArtifact = releaseNode.find {
it["packagetype"].textValue() == "bdist_wheel"
} ?: releaseNode[0]
val url = binaryArtifact["url"]?.textValue() ?: return RemoteArtifact.EMPTY
val hash = binaryArtifact["md5_digest"]?.textValue()?.let { Hash.create(it) } ?: return RemoteArtifact.EMPTY
return RemoteArtifact(url, hash)
}
private fun getSourceArtifact(releaseNode: ArrayNode?): RemoteArtifact {
releaseNode ?: return RemoteArtifact.EMPTY
val sourceArtifacts = releaseNode.asSequence().filter {
it["packagetype"].textValue() == "sdist"
}
if (sourceArtifacts.count() == 0) return RemoteArtifact.EMPTY
val sourceArtifact = sourceArtifacts.find {
it["filename"].textValue().endsWith(".tar.bz2")
} ?: sourceArtifacts.elementAt(0)
val url = sourceArtifact["url"]?.textValue() ?: return RemoteArtifact.EMPTY
val hash = sourceArtifact["md5_digest"]?.textValue() ?: return RemoteArtifact.EMPTY
return RemoteArtifact(url, Hash.create(hash))
}
private fun parseAuthors(pkgInfo: JsonNode): SortedSet<String> =
parseAuthorString(pkgInfo["author"]?.textValue())
private fun parseAuthorString(author: String?): SortedSet<String> =
author?.takeIf(::isValidAuthor)?.let { sortedSetOf(it) } ?: sortedSetOf()
/**
* Check if the given [author] string represents a valid author name. There are some non-null strings that
* indicate that no author information is available. For instance, setup.py files can contain empty strings;
* the "pip show" command prints the string "None" in this case.
*/
private fun isValidAuthor(author: String): Boolean = author.isNotBlank() && author != "None"
private fun getDeclaredLicenses(pkgInfo: JsonNode): SortedSet<String> {
val declaredLicenses = sortedSetOf<String>()
// Use the top-level license field as well as the license classifiers as the declared licenses.
getLicenseFromLicenseField(pkgInfo["license"]?.textValue())?.let { declaredLicenses += it }
pkgInfo["classifiers"]?.mapNotNullTo(declaredLicenses) { getLicenseFromClassifier(it.textValue()) }
return declaredLicenses
}
private fun getLicenseFromLicenseField(value: String?): String? {
if (value.isNullOrBlank() || value == "UNKNOWN") return null
// See https://docs.python.org/3/distutils/setupscript.html#additional-meta-data for what a "short string" is.
val isShortString = value.length <= SHORT_STRING_MAX_CHARS && value.lines().size == 1
if (!isShortString) return null
// Apply a work-around for projects that declare licenses in classifier-syntax in the license field.
return getLicenseFromClassifier(value) ?: value
}
private fun getLicenseFromClassifier(classifier: String): String? {
// Example license classifier (also see https://pypi.org/classifiers/):
// "License :: OSI Approved :: GNU Library or Lesser General Public License (LGPL)"
val classifiers = classifier.split(" :: ").map { it.trim() }
val licenseClassifiers = listOf("License", "OSI Approved")
val license = classifiers.takeIf { it.first() in licenseClassifiers }?.last()
return license?.takeUnless { it in licenseClassifiers }
}
private fun setupVirtualEnv(workingDir: File, definitionFile: File, pythonMajorVersion: Int): File {
var projectPythonVersion = pythonMajorVersion
// Create an out-of-tree virtualenv.
logger.info { "Creating a virtualenv for the '${workingDir.name}' project directory..." }
logger.info { "Trying to install dependencies using Python $projectPythonVersion..." }
var virtualEnvDir = createVirtualEnv(workingDir, projectPythonVersion)
val install = installDependencies(workingDir, definitionFile, virtualEnvDir)
if (install.isError) {
logger.debug {
// pip writes the real error message to stdout instead of stderr.
"First try to install dependencies using Python $projectPythonVersion failed with:\n${install.stdout}"
}
// If there was a problem maybe the required Python version was detected incorrectly, so simply try again
// with the other version.
projectPythonVersion = when (projectPythonVersion) {
2 -> 3
3 -> 2
else -> throw IllegalArgumentException("Unsupported Python version $projectPythonVersion.")
}
logger.info { "Falling back to trying to install dependencies using Python $projectPythonVersion..." }
virtualEnvDir.safeDeleteRecursively()
virtualEnvDir = createVirtualEnv(workingDir, projectPythonVersion)
installDependencies(workingDir, definitionFile, virtualEnvDir).requireSuccess()
}
logger.info {
"Successfully installed dependencies for project '$definitionFile' using Python $projectPythonVersion."
}
return virtualEnvDir
}
private fun createVirtualEnv(workingDir: File, pythonVersion: Int): File {
val virtualEnvDir = createOrtTempDir("${workingDir.name}-virtualenv")
val pythonInterpreter = requireNotNull(PythonVersion.getPythonInterpreter(pythonVersion)) {
"No Python interpreter found for version $pythonVersion."
}
ProcessCapture(workingDir, "virtualenv", virtualEnvDir.path, "-p", pythonInterpreter).requireSuccess()
return virtualEnvDir
}
private fun installDependencies(workingDir: File, definitionFile: File, virtualEnvDir: File): ProcessCapture {
// Ensure to have installed a version of pip that is known to work for us.
var pip = if (Os.isWindows) {
// On Windows, in-place pip up- / downgrades require pip to be wrapped by "python -m", see
// https://github.com/pypa/pip/issues/1299.
runInVirtualEnv(
virtualEnvDir, workingDir, "python", "-m", command(workingDir),
*TRUSTED_HOSTS, "install", "pip==$PIP_VERSION"
)
} else {
runPipInVirtualEnv(virtualEnvDir, workingDir, "install", "pip==$PIP_VERSION")
}
pip.requireSuccess()
// TODO: Find a way to make installation of packages with native extensions work on Windows where often the
// appropriate compiler is missing / not set up, e.g. by using pre-built packages from
// http://www.lfd.uci.edu/~gohlke/pythonlibs/
pip = if (definitionFile.name == "setup.py") {
// Note that this only installs required "install" dependencies, not "extras" or "tests" dependencies.
runPipInVirtualEnv(virtualEnvDir, workingDir, "install", *INSTALL_OPTIONS, ".")
} else {
// In "setup.py"-speak, "requirements.txt" just contains required "install" dependencies.
runPipInVirtualEnv(
virtualEnvDir, workingDir, "install", *INSTALL_OPTIONS, "-r",
definitionFile.name
)
}
// TODO: Consider logging a warning instead of an error if the command is run on a file that likely belongs to
// a test.
with(pip) {
if (isError) logger.error { errorMessage }
}
return pip
}
private fun parseDependencies(
dependencies: Iterable<JsonNode>,
allIds: SortedSet<Identifier>,
installDependencies: SortedSet<PackageReference>
) {
dependencies.forEach { dependency ->
val id = Identifier(
type = "PyPI",
namespace = "",
name = dependency["package_name"].textValue().normalizePackageName(),
version = dependency["installed_version"].textValue()
)
val packageRef = PackageReference(id)
allIds += id
installDependencies += packageRef
parseDependencies(dependency["dependencies"], allIds, packageRef.dependencies)
}
}
private fun getPackageFromPyPi(id: Identifier): Package {
// See https://wiki.python.org/moin/PyPIJSON.
val url = "https://pypi.org/pypi/${id.name}/${id.version}/json"
return OkHttpClientHelper.downloadText(url).mapCatching { json ->
val pkgData = jsonMapper.readTree(json)
val pkgInfo = pkgData["info"]
val pkgRelease = pkgData["urls"] as? ArrayNode
val homepageUrl = pkgInfo["home_page"]?.textValue().orEmpty()
val declaredLicenses = getDeclaredLicenses(pkgInfo)
var declaredLicensesProcessed = DeclaredLicenseProcessor.process(declaredLicenses)
// Python's classifiers only support a coarse license declaration of "BSD License". So if there is another
// more specific declaration of a BSD license, align on that one.
if (GENERIC_BSD_LICENSE in declaredLicensesProcessed.unmapped) {
declaredLicensesProcessed.spdxExpression?.decompose()?.singleOrNull {
it is SpdxLicenseIdExpression && it.isValid() && it.toString().startsWith("BSD-")
}?.let { license ->
logger.debug { "Mapping '$GENERIC_BSD_LICENSE' to '$license' for ${id.toCoordinates()}." }
declaredLicensesProcessed = declaredLicensesProcessed.copy(
mapped = declaredLicensesProcessed.mapped + mapOf(GENERIC_BSD_LICENSE to license),
unmapped = declaredLicensesProcessed.unmapped - GENERIC_BSD_LICENSE
)
}
}
val projectUrls = pkgInfo["project_urls"]
val vcsFallbackUrls = listOfNotNull(
homepageUrl,
pkgInfo["project_url"]?.textValue(),
projectUrls["Code"]?.textValue(),
projectUrls["Homepage"]?.textValue(),
projectUrls["Source"]?.textValue(),
projectUrls["Source Code"]?.textValue()
).toTypedArray()
Package(
id = id,
homepageUrl = homepageUrl,
description = pkgInfo["summary"]?.textValue().orEmpty(),
authors = parseAuthors(pkgInfo),
declaredLicenses = declaredLicenses,
declaredLicensesProcessed = declaredLicensesProcessed,
binaryArtifact = getBinaryArtifact(pkgRelease),
sourceArtifact = getSourceArtifact(pkgRelease),
vcs = VcsInfo.EMPTY,
vcsProcessed = processPackageVcs(VcsInfo.EMPTY, *vcsFallbackUrls)
)
}.onFailure {
logger.warn { "Unable to retrieve PyPI metadata for package '${id.toCoordinates()}'." }
}.getOrDefault(Package.EMPTY.copy(id = id))
}
private fun getInstalledPackagesWithLocalMetaData(virtualEnvDir: File, workingDir: File): List<Package> {
val allPackages = listAllInstalledPackages(virtualEnvDir, workingDir)
// Invoking 'pip show' once for each package separately is too slow, thus obtain the output for all packages
// and split it at the separator lines: "---".
val output = runInVirtualEnv(
virtualEnvDir,
workingDir,
"pip",
"show",
"--verbose",
*allPackages.map { it.name }.toTypedArray()
).requireSuccess().stdout
return output.normalizeLineBreaks().split("\n---\n").map { parsePipShowOutput(it) }
}
/**
* Return the [Identifier]s of all installed packages, determined via the command 'pip list'.
*/
private fun listAllInstalledPackages(virtualEnvDir: File, workingDir: File): Set<Identifier> {
val json = runInVirtualEnv(virtualEnvDir, workingDir, "pip", "list", "--format", "json")
.requireSuccess()
.stdout
val rootNode = jsonMapper.readTree(json) as ArrayNode
return rootNode.elements().asSequence().mapNotNullTo(mutableSetOf()) {
val name = it["name"].textValue()
val version = it["version"].textValue()
Identifier("PyPI", "", name, version).takeUnless { isPhonyDependency(name, version) }
}
}
/**
* Parse the output of 'pip show <package-name> --verbose' to a package.
*/
private fun parsePipShowOutput(output: String): Package {
val map = mutableMapOf<String, MutableList<String>>()
var previousKey: String? = null
output.lines().forEach { line ->
if (!line.startsWith(" ")) {
val index = line.indexOf(":")
if (index < 0) return@forEach
val key = line.substring(0, index)
val value = line.substring(index + 1, line.length).trim()
if (value.isNotEmpty()) {
map.getOrPut(key) { mutableListOf() } += value
}
previousKey = key
return@forEach
}
previousKey?.let {
map.getOrPut(it) { mutableListOf() } += line.trim()
}
}
val id = Identifier(
type = "PyPI",
namespace = "",
name = map.getValue("Name").single().normalizePackageName(),
version = map.getValue("Version").single()
)
val declaredLicenses = sortedSetOf<String>()
map["License"]?.let { licenseShortString ->
getLicenseFromLicenseField(licenseShortString.firstOrNull())?.let { declaredLicenses += it }
val moreLines = licenseShortString.drop(1)
if (moreLines.isNotEmpty()) {
logger.warn {
"The 'License' field of package '${id.toCoordinates()}' is supposed to be a short string but it " +
"contains the following additional lines which will be ignored:"
}
moreLines.forEach { line ->
logger.warn { line }
}
}
}
map["Classifiers"]?.mapNotNullTo(declaredLicenses) { getLicenseFromClassifier(it) }
val authors = parseAuthorString(map["Author"]?.singleOrNull())
return Package(
id = id,
description = map["Summary"]?.single().orEmpty(),
homepageUrl = map["Home-page"]?.single().orEmpty(),
authors = authors,
declaredLicenses = declaredLicenses,
binaryArtifact = RemoteArtifact.EMPTY,
sourceArtifact = RemoteArtifact.EMPTY,
vcs = VcsInfo.EMPTY
)
}
}
private fun Package.enrichWith(other: Package?): Package =
if (other != null) {
Package(
id = id,
homepageUrl = homepageUrl.takeUnless { it.isBlank() } ?: other.homepageUrl,
description = description.takeUnless { it.isBlank() } ?: other.description,
authors = authors.takeUnless { it.isEmpty() } ?: other.authors,
declaredLicenses = declaredLicenses.takeUnless { it.isEmpty() } ?: other.declaredLicenses,
declaredLicensesProcessed = declaredLicensesProcessed.takeUnless { declaredLicenses.isEmpty() }
?: other.declaredLicensesProcessed,
binaryArtifact = binaryArtifact.takeUnless { it == RemoteArtifact.EMPTY } ?: other.binaryArtifact,
sourceArtifact = sourceArtifact.takeUnless { it == RemoteArtifact.EMPTY } ?: other.sourceArtifact,
vcs = vcs.takeUnless { it == VcsInfo.EMPTY } ?: other.vcs,
vcsProcessed = vcsProcessed.takeUnless { it == VcsInfo.EMPTY } ?: other.vcsProcessed
)
} else {
this
}
/**
* Normalize all PyPI package names to be lowercase and hyphenated as per PEP 426 and 503:
*
* PEP 426 (https://www.python.org/dev/peps/pep-0426/#name):
* "All comparisons of distribution names MUST be case-insensitive,
* and MUST consider hyphens and underscores to be equivalent".
*
* PEP 503 (https://www.python.org/dev/peps/pep-0503/#normalized-names):
* "This PEP references the concept of a "normalized" project name.
* As per PEP 426 the only valid characters in a name are the ASCII alphabet,
* ASCII numbers, ., -, and _. The name should be lowercased with all runs
* of the characters ., -, or _ replaced with a single - character."
*/
private fun String.normalizePackageName(): String = replace(Regex("[-_.]+"), "-").lowercase()