diff --git a/ecosystemDataManager/ecosystemDataManager.py b/ecosystemDataManager/ecosystemDataManager.py index ccb4464..98a318e 100644 --- a/ecosystemDataManager/ecosystemDataManager.py +++ b/ecosystemDataManager/ecosystemDataManager.py @@ -172,7 +172,10 @@ def evaluatePackages(self): if packageDependency.getLatestVersion().getDatetime(): iregular = package.evaluate(packageDependency) if iregular: - self.addDictKey(iregularPackages, package) + try: + iregularPackages[package] += 1 + except Exception as e: + iregularPackages[package] = 1 print("[" + str(evaluated) + "/" + str(size) + "]", package, "-->", packageDependency) break evaluated += 1 @@ -317,8 +320,8 @@ def getLicenses(self): licenses = list(licenses) return licenses - def getMostPopularLicenses(self, size = None): - distribution = {group.value: {} for group in Group} + def getMostPopularLicenses(self, size = None, unknown = False, known = False): + distribution = {} versionsHasLicenses = self.get("VersionsHasLicenses") licensesHasGroup = self.get("LicensesHasGroup") for i in range(len(versionsHasLicenses)): @@ -326,12 +329,30 @@ def getMostPopularLicenses(self, size = None): for j in range(len(version)): license = version[j] group = licensesHasGroup[i][j] - self.addDictKey(distribution[group], license) - for group in distribution.keys(): - distribution[group] = sorted(distribution[group].items(), key=lambda x: x[1], reverse = True) - if size: - distribution[group] = distribution[group][:size] - return distribution + if unknown: + if group == Group.KNOWN.value: + continue + if group == Group.FILE.value: + continue + elif known: + if group == Group.UNDEFINED.value: + continue + if group == Group.COPYRIGHT.value: + continue + if group == Group.DUBIOUS.value: + continue + if group == Group.FILE.value: + continue + if group == Group.UNKNOWN.value: + continue + try: + distribution[license] += 1 + except Exception as e: + distribution[license] = 1 + mostPopularLicenses = sorted(distribution.items(), key=lambda x: x[1], reverse = True) + if size: + mostPopularLicenses = mostPopularLicenses[:size] + return mostPopularLicenses def proportion(self): iregularPackages = 0 @@ -393,15 +414,19 @@ def groupsProportion(self): versionsHasLicenses = self.get("LicensesHasGroup") versionsHasDatetime = self.get("VersionsHasDatetime") distribution = {} + distribution["-1"] = 0 for i in range(len(versionsHasLicenses)): version = versionsHasLicenses[i] datetime = versionsHasDatetime[i] if not datetime: continue if not version: - self.addDictKey(distribution, "-1") + distribution["-1"] += 1 for group in version: - self.addDictKey(distribution, group) + try: + distribution[group] += 1 + except Exception as e: + distribution[group] = 1 return distribution def licensesProportion(self): @@ -414,7 +439,10 @@ def licensesProportion(self): if not datetime: continue size = len(version) - self.addDictKey(distribution, size) + try: + distribution[size] += 1 + except Exception as e: + distribution[size] = 1 return distribution def groupsDependencies(self): @@ -475,8 +503,13 @@ def groupsEvolution(self): adjacencies[licenseFrom.getGroup().value][licenseTo.getGroup().value] += 1 return adjacencies - def extractEvolutionPatterns(self, size = None): - adjacencies = {groupFrom.name: {groupTo.name: {} for groupTo in Group} for groupFrom in Group} + def resolveGroup(self, group): + if group == Group.UNDEFINED or group == Group.COPYRIGHT or group == Group.DUBIOUS or group == Group.UNKNOWN: + return Group.UNKNOWN + return group + + def extractEvolutionPatterns(self): + adjacencies = {groupFrom.name:{groupTo.name:0 for groupTo in Group} for groupFrom in Group} for package in self.getPackages(): history = package.getHistory() for i in range(len(history) - 1): @@ -491,11 +524,11 @@ def extractEvolutionPatterns(self, size = None): if not licensesFrom and licensesTo: for licenseTo in licensesTo: group = licenseTo.getGroup() - self.addDictKey(adjacencies[Group.NONE.name][group.name], "none" + "->" + str(licenseTo)) + self.addDictKey(adjacencies[Group.UNKNOWN.name][self.resolveGroup(group)], "none" + "->" + str(licenseTo)) elif not licensesTo: for licenseFrom in licensesFrom: group = licenseFrom.getGroup() - self.addDictKey(adjacencies[group.name][Group.NONE.name], str(licenseFrom) + "->" + "none") + self.addDictKey(adjacencies[self.resolveGroup(group)][Group.UNKNOWN.name], str(licenseFrom) + "->" + "none") else: for licenseFrom in licensesFrom: for licenseTo in licensesTo: @@ -503,12 +536,7 @@ def extractEvolutionPatterns(self, size = None): groupTo = licenseTo.getGroup() if licenseFrom == licenseTo: continue - self.addDictKey(adjacencies[groupFrom.name][groupTo.name], str(licenseFrom) + "->" + str(licenseTo)) - for groupFrom in adjacencies.keys(): - for groupTo in adjacencies.keys(): - adjacencies[groupFrom][groupTo] = sorted(adjacencies[groupFrom][groupTo].items(), key=lambda x: x[1], reverse = True) - if size: - adjacencies[groupFrom][groupTo] = adjacencies[groupFrom][groupTo][:size] + self.addDictKey(adjacencies[self.resolveGroup(groupFrom)][self.resolveGroup(groupTo)], str(licenseFrom) + "->" + str(licenseTo)) return adjacencies def addDictKey(self, dictionary, key): @@ -535,9 +563,9 @@ def evaluateInLicenses(self, inLicenses): return True if inLicense.getGroup() == Group.COPYRIGHT: return True - if inLicense.getGroup() == Group.UNAPPROVED: + if inLicense.getGroup() == Group.UNKNOWN: return True - if inLicense.getGroup() == Group.MISUSED: + if inLicense.getGroup() == Group.DUBIOUS: return True return False diff --git a/generateDOTVersionIrregularDependenciesGraph.py b/generateDOTVersionIrregularDependenciesGraph.py index 33e4989..deef8d4 100644 --- a/generateDOTVersionIrregularDependenciesGraph.py +++ b/generateDOTVersionIrregularDependenciesGraph.py @@ -4,6 +4,10 @@ VISITED = [] FILE = None +""" +Write in file the struct to by read in DOT generator +To generate DOT in PDF use the comand: dot -Tpdf > file.pdf +""" def generate(version): if version in VISITED: return @@ -24,6 +28,9 @@ def generate(version): FILE.write("\"" + str(outV) + "\\n" + outL + "\"" + "->" + "\"" + str(inV) + "\\n" + inL + "\"" + "[color=" + color + "]" + ";") generate(inV) +""" +Generate a directed graphs from package of ecosystem +""" def generateDot(version): FILE.write("digraph graphname {") generate(version) diff --git a/generateGEXFContextGraph.py b/generateGEXFContextGraph.py index 8c378b4..b3c5a4a 100644 --- a/generateGEXFContextGraph.py +++ b/generateGEXFContextGraph.py @@ -87,6 +87,9 @@ def getAttributes(entity): attributes["shape"] = "disc" return attributes +""" +Generate a directed graph by package in ecosystem, this graph most be read by Gephi +""" def generateGraph(entity): generateDependencies(entity) generateOcurrences(entity) diff --git a/generateGEXFVersionContextGraph.py b/generateGEXFVersionContextGraph.py index db89d59..63bc538 100644 --- a/generateGEXFVersionContextGraph.py +++ b/generateGEXFVersionContextGraph.py @@ -95,6 +95,9 @@ def getAttributes(entity): attributes["shape"] = "disc" return attributes +""" +Generate a directed graph of package with all verions in ecosystem, this graph most be read by Gephi +""" def generateGraph(version): generateDependencies(version) generateOcurrences(version) diff --git a/plotDistributions.py b/plotDistributions.py index d3ad06f..1dc38a5 100644 --- a/plotDistributions.py +++ b/plotDistributions.py @@ -6,6 +6,13 @@ import math from ecosystemDataManager.ecosystemDataManager import EcosystemDataManager +""" +Functions to plot charts, all the plot is a html file; +""" + +""" +Function to plot a histogram, most be put params: vector (numbers of histogram) and name to histogram. +""" def plotHistogram(vector, name_histogram): trace = go.Histogram( name='Results', @@ -19,6 +26,9 @@ def plotHistogram(vector, name_histogram): data = [trace] plotly.offline.plot(data, filename=name_histogram) +""" +Function to plot a range of hitograms, the params is a vector by vector. +""" def plotHistograms(vectors, name_histogram): data = [] for vector in vectors: @@ -34,6 +44,9 @@ def plotHistograms(vectors, name_histogram): data.append(trace) plotly.offline.plot(data, filename=name_histogram) +""" +Function to plot a boxplot, the params is a vector and name to boxplot. +""" def plotBoxPlot(vector, name_boxplot): trace0 = go.Box( y=vector, @@ -41,6 +54,9 @@ def plotBoxPlot(vector, name_boxplot): data = [trace0] plotly.offline.plot(data, filename=name_boxplot) +""" +Function to plot a range of boxplot, the params is a vector by vector and name to boxplot. +""" def plotMultBoxPlot(vectors, name_boxplot): data = [] for vector in vectors: @@ -52,6 +68,10 @@ def plotMultBoxPlot(vectors, name_boxplot): data.append(trace) plotly.offline.plot(data, filename=name_boxplot) +""" +Function to plot a bar chart, the params is a vector with X positions, vector with Y positions +and name to barchart. +""" def plorBarChart(vector_x, vector_y, nameBarChart): data = [go.Bar( x=vector_x, @@ -59,7 +79,10 @@ def plorBarChart(vector_x, vector_y, nameBarChart): )] plotly.offline.plot(data, filename=nameBarChart) - +""" +Function to plot a range of bar chart, the params is a vector with name each barchart, +vector with X positions, vector with Y positions and name to barchart. +""" def plotMultBarsChart(setName, vector_x, vectors_y, nameBarChart): data = [] i = 0 @@ -77,6 +100,10 @@ def plotMultBarsChart(setName, vector_x, vectors_y, nameBarChart): fig = go.Figure(data=data, layout=layout) plotly.offline.plot(fig, filename=nameBarChart) +""" +Function to plot a scatter plot chart, the params is vector with X positions, +vector with Y positions and name to scatter plot chart. +""" def plorScatterChart(vector_x, vector_y, nameBarChart): data = [go.Scatter( x=vector_x, @@ -85,6 +112,10 @@ def plorScatterChart(vector_x, vector_y, nameBarChart): )] plotly.offline.plot(data, filename=nameBarChart) +""" +Function to plot a scatter plot chart, the params is vector with X positions, +vector with Y positions and name to scatter plot chart. +""" def plotMultScatterChart(setName, vector_x, vectors_y, nameBarChart): data = [] i = 0 @@ -99,6 +130,9 @@ def plotMultScatterChart(setName, vector_x, vectors_y, nameBarChart): data.append(trace) plotly.offline.plot(data, filename=nameBarChart) +""" +Function to plot the most popular licenses in bar chart +""" def plotMostPopularLicenses(keys, values, chartName): trace = go.Bar( name=chartName, @@ -108,6 +142,9 @@ def plotMostPopularLicenses(keys, values, chartName): data = [trace] plotly.offline.plot(data, filename=chartName) +""" +Function to plot the package history using a mult scatter plot +""" def plotPackageHistory(package, chartName): historyVersions = package.getHistory() listLocalRegularityRate = [] @@ -132,6 +169,9 @@ def plotNumberDependenciesBetweenPackages(ecosystemDataManager): lenVersionsDependencies.append(lenVersionDependencies) return lenVersionsDependencies +""" +Function to plot package history by versions, the out plot is a scatter plot. +""" def popularVersionHistory(package, chartName): versionsOcurrences = [] localRegularityRate = []