From a042e0ac0188692c833495a6e50323c30e14b41d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Michael=20R=C3=BCegg?= Date: Wed, 13 Apr 2016 15:17:05 +0200 Subject: [PATCH] Fixed parsing issues reported in #19 --- .../mibex/bitbucket/sonar/diff/GitDiffParser.scala | 10 +++++++--- .../sonar/diff/IssuesOnChangedLinesFilter.scala | 1 - .../resources/diffs/spaces-in-git-diff-path.txt | 6 ++++++ src/test/resources/diffs/u2028-char-issue.txt | 7 +++++++ .../bitbucket/sonar/diff/GitDiffParserSpec.scala | 13 ++++++++++--- 5 files changed, 30 insertions(+), 7 deletions(-) create mode 100644 src/test/resources/diffs/spaces-in-git-diff-path.txt create mode 100644 src/test/resources/diffs/u2028-char-issue.txt diff --git a/src/main/scala/ch/mibex/bitbucket/sonar/diff/GitDiffParser.scala b/src/main/scala/ch/mibex/bitbucket/sonar/diff/GitDiffParser.scala index c24c9f4..9e82f07 100644 --- a/src/main/scala/ch/mibex/bitbucket/sonar/diff/GitDiffParser.scala +++ b/src/main/scala/ch/mibex/bitbucket/sonar/diff/GitDiffParser.scala @@ -97,11 +97,11 @@ object GitDiffParser extends RegexParsers { case fh ~ th ~ optFm => Index(fh, th, optFm) } - def nl: Parser[String] = """(\r?\n)+""".r + def nl: Parser[String] = """[\n\r\f\u2028\u2029]+""".r // e.g., see http://www.fileformat.info/info/unicode/char/2028/index.htm def fileMode: Parser[Int] = """[0-7]{6}""".r ^^ { _.toInt } - def filePath: Parser[String] = """[\S]+""".r + def filePath: Parser[String] = """.+?(?=(\sb/)|(\r?\n))""".r def similarity: Parser[Int] = """\d{1,3}""".r ^^ { _.toInt } @@ -153,10 +153,14 @@ object GitDiffParser extends RegexParsers { def num: Parser[Int] = """\d+""".r ^^ { _.toInt } def parse(diff: String): Either[ParsingFailure, List[Diff]] = { - parseAll(allDiffs, diff) match { + parseAll(allDiffs, stripNelCharacters(diff)) match { case Success(s, _) => Right(s) case NoSuccess(msg, _) => Left(ParsingFailure(msg)) } } + // a NEL character can occur inside a normal text line and would be interpreted as a NL + // this can cause problems in diff lines and should therefore be ignored + private def stripNelCharacters(diff: String) = diff.replaceAll("\u0085", "") + } \ No newline at end of file diff --git a/src/main/scala/ch/mibex/bitbucket/sonar/diff/IssuesOnChangedLinesFilter.scala b/src/main/scala/ch/mibex/bitbucket/sonar/diff/IssuesOnChangedLinesFilter.scala index 70ebf2e..713efd2 100644 --- a/src/main/scala/ch/mibex/bitbucket/sonar/diff/IssuesOnChangedLinesFilter.scala +++ b/src/main/scala/ch/mibex/bitbucket/sonar/diff/IssuesOnChangedLinesFilter.scala @@ -21,7 +21,6 @@ class IssuesOnChangedLinesFilter(bitbucketClient: BitbucketClient, val issuesOnChangedLines = newIssues filter { i => val lineNr = Option(i.line()).flatMap(l => Option(l.toInt)).getOrElse(0) - inputFileCache.resolveRepoRelativePath(i.componentKey()) match { case Some(filePath) => val isIssueOnChangedLines = (diff: Diff) => diff match { diff --git a/src/test/resources/diffs/spaces-in-git-diff-path.txt b/src/test/resources/diffs/spaces-in-git-diff-path.txt new file mode 100644 index 0000000..4fb36f6 --- /dev/null +++ b/src/test/resources/diffs/spaces-in-git-diff-path.txt @@ -0,0 +1,6 @@ +diff --git a/skin/frontend/adc/default/images/reimbursement/prescription image_Audi BKK.jpg b/skin/frontend/adc/default/images/reimbursement/prescription image_Audi BKK.jpg +new file mode 100644 +index 0000000000000000000000000000000000000000..45e8602886f325a506424ed2c0444f2150c79a8b +GIT binary patch +literal 82397 +zcmeFa1yohr`Zv50kQ4*~K|#8^L4*xThjf<+h=71dm)pXi8_6vtx#_MgB3;r-cX!8* diff --git a/src/test/resources/diffs/u2028-char-issue.txt b/src/test/resources/diffs/u2028-char-issue.txt new file mode 100644 index 0000000..11f4b13 --- /dev/null +++ b/src/test/resources/diffs/u2028-char-issue.txt @@ -0,0 +1,7 @@ +diff --git a/app/code/local/Adc/Advisa/data/upgrade-1.0.12-1.0.13/fr_FR/terms-and-conditions.html b/app/code/local/Adc/Advisa/data/upgrade-1.0.12-1.0.13/fr_FR/terms-and-conditions.html +new file mode 100644 +index 0000000..8b10dbd +--- /dev/null ++++ b/app/code/local/Adc/Advisa/data/upgrade-1.0.12-1.0.13/fr_FR/terms-and-conditions.html +@@ -0,0 +1,259 @@ ++t 
 ABBOTT diff --git a/src/test/scala/ch/mibex/bitbucket/sonar/diff/GitDiffParserSpec.scala b/src/test/scala/ch/mibex/bitbucket/sonar/diff/GitDiffParserSpec.scala index c7c7bce..27be541 100644 --- a/src/test/scala/ch/mibex/bitbucket/sonar/diff/GitDiffParserSpec.scala +++ b/src/test/scala/ch/mibex/bitbucket/sonar/diff/GitDiffParserSpec.scala @@ -11,7 +11,7 @@ class GitDiffParserSpec extends Specification with ParserMatchers with StringMat import GitDiffParser._ private def readFile(path: String) = - scala.io.Source.fromInputStream(getClass.getResourceAsStream(path)).mkString + scala.io.Source.fromInputStream(getClass.getResourceAsStream(path)).mkString.replaceAll("\u0085", "") "diff headers mode" should { @@ -51,7 +51,6 @@ class GitDiffParserSpec extends Specification with ParserMatchers with StringMat "diff headers copy" should { "parse file path" in { - filePath must succeedOn("a/b/c.txt").withResult("a/b/c.txt") filePath must failOn("") } @@ -577,7 +576,11 @@ class GitDiffParserSpec extends Specification with ParserMatchers with StringMat "parse diff with u0085 new line character" in { allDiffs must succeedOn(readFile("/diffs/u0085-char-issue.txt")) - }.pendingUntilFixed + } + + "parse diff with another u2028 new line character" in { + allDiffs must succeedOn(readFile("/diffs/u2028-char-issue.txt")) + } "Github issue #8" in { allDiffs must succeedOn(readFile("/diffs/github#8.txt")) @@ -607,6 +610,10 @@ class GitDiffParserSpec extends Specification with ParserMatchers with StringMat allDiffs must succeedOn(readFile("/diffs/diff_pr_153_ko.diff.txt")) } + "parse spaces in the git diff file path" in { + allDiffs must succeedOn(readFile("/diffs/spaces-in-git-diff-path.txt")) + } + "Github issue #8 failing diff" in { allDiffs must succeedOn(readFile("/diffs/failing-diff.txt")).withResult( List(