Skip to content

Commit

Permalink
Merge pull request #421 from idpaterson/pull-requests/indented-single…
Browse files Browse the repository at this point in the history
…-line-comment-grouping

Grouping for indented single line comments
  • Loading branch information
tomaz committed Jan 9, 2014
2 parents 63e6e97 + 2eb41ab commit b02fe29
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 2 deletions.
49 changes: 47 additions & 2 deletions Parsing/GBTokenizer.m
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ - (NSString *)commentValueFromString:(NSString *)value isMultiline:(BOOL)multili
- (NSString *)lineByPreprocessingHeaderDocDirectives:(NSString *)line;
- (NSArray *)linesByReorderingHeaderDocDirectives:(NSArray *)lines;
- (NSArray *)allTokensFromTokenizer:(PKTokenizer *)tokenizer;
- (NSUInteger)offsetOfLineContainingOffset:(NSUInteger)offset;
- (NSInteger)indentationAtOffset:(NSUInteger)offset;
@property (retain) NSString *filename;
@property (retain) NSString *input;
@property (retain) NSArray *tokens;
Expand Down Expand Up @@ -182,17 +184,22 @@ - (BOOL)consumeComments {

PKToken *startingPreviousToken = nil;
PKToken *startingLastToken = nil;
NSUInteger previousSingleLineEndOffset = 0;
NSUInteger previousSingleLineEndOffset = NSNotFound;
NSInteger previousSingleLineIndentation = -1;
while (![self eof] && [[self currentToken] isComment]) {
PKToken *token = [self currentToken];
NSString *value = nil;

// Match single line comments. Note that we can simplify the code with assumption that there's only one single line comment per match. If regex finds more (should never happen though), we simply combine them together. Then we check if the comment is a continuation of previous single liner by testing the string offset. If so we group the values together, otherwise we create a new single line comment. Finally we remember current comment offset to allow grouping of next single line comment. CAUTION: this algorithm won't group comments unless they start at the beginning of the line!
// Match single line comments. Note that we can simplify the code with assumption that there's only one single line comment per match. If regex finds more (should never happen though), we simply combine them together. Then we check if the comment is a continuation of previous single liner by testing the string offset and indentation. If so we group the values together, otherwise we create a new single line comment. Finally we remember current comment offset to allow grouping of next single line comment.
NSArray *singleLiners = [[token stringValue] componentsMatchedByRegex:self.singleLineCommentRegex capture:1];
if ([singleLiners count] > 0) {
value = [NSString string];
for (NSString *match in singleLiners) value = [value stringByAppendingString:match];
NSInteger tokenIndentation = [self indentationAtOffset:[token offset]];
BOOL isContinuingPreviousSingleLiner = ([token offset] == previousSingleLineEndOffset + 1);
if (!isContinuingPreviousSingleLiner && previousSingleLineIndentation > 0 && tokenIndentation == previousSingleLineIndentation) {
isContinuingPreviousSingleLiner = ([token offset] == previousSingleLineEndOffset + previousSingleLineIndentation + 1);
}
if (isContinuingPreviousSingleLiner) {
[self.lastCommentBuilder appendString:@"\n"];
} else {
Expand All @@ -204,6 +211,7 @@ - (BOOL)consumeComments {
startingLastToken = token;
}
previousSingleLineEndOffset = [token offset] + [[token stringValue] length];
previousSingleLineIndentation = tokenIndentation;
}

// Match multiple line comments and only process last (in reality we should only have one comment in each mutliline comment token, but let's handle any strange cases graceosly).
Expand Down Expand Up @@ -403,6 +411,43 @@ - (NSArray *)allTokensFromTokenizer:(PKTokenizer *)tokenizer {
return result;
}

- (NSUInteger)offsetOfLineContainingOffset:(NSUInteger)offset {
// This method returns the offset of the first character in the line
// containing the character at the specific offset.
NSRange newlineRange = [self.input rangeOfCharacterFromSet:[NSCharacterSet newlineCharacterSet]
options:NSBackwardsSearch
range:NSMakeRange(0, offset)];
if (newlineRange.location != NSNotFound) {
return newlineRange.location + 1;
}
// First line
return 0;
}

- (NSInteger)indentationAtOffset:(NSUInteger)offset {
// This method returns the number of tab or space characters preceding the
// offset if and only if it is only preceded by such indentation characters,
// otherwise returns -1.
NSUInteger lineOffset = [self offsetOfLineContainingOffset:offset];
NSRange lineToOffsetRange = NSMakeRange(lineOffset, offset - lineOffset);

// Short-circuit logic if offset is at the start of the line
if (lineToOffsetRange.length == 0) {
return 0;
}

NSCharacterSet * nonWhitespace = [[NSCharacterSet whitespaceCharacterSet] invertedSet];
NSRange nonWhitespaceRange = [self.input rangeOfCharacterFromSet:nonWhitespace
options:0
range:lineToOffsetRange];
// Line contains only whitespace preceding the offset: indentation
if (nonWhitespaceRange.location == NSNotFound) {
return lineToOffsetRange.length;
}
return -1;
}


#pragma mark Properties

@synthesize filename;
Expand Down
14 changes: 14 additions & 0 deletions Testing/GBTokenizerTesting.m
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,20 @@ - (void)testLastCommentString_shouldGroupSingleLineComments {
assertThat([tokenizer.lastComment stringValue], is(@"line1\nline2"));
}

- (void)testLastCommentString_shouldGroupSingleLineCommentsIfIndentationMatches {
// setup & execute
GBTokenizer *tokenizer = [GBTokenizer tokenizerWithSource:[PKTokenizer tokenizerWithString:@" /// line1\n /// line2\n ONE"] filename:@"file"];
// verify
assertThat([tokenizer.lastComment stringValue], is(@"line1\nline2"));
}

- (void)testLastCommentString_shouldIgnoreSingleLineCommentsIfIndentationDoesNotMatch {
// setup & execute
GBTokenizer *tokenizer = [GBTokenizer tokenizerWithSource:[PKTokenizer tokenizerWithString:@" /// line1\n /// line2\n ONE"] filename:@"file"];
// verify
assertThat([tokenizer.lastComment stringValue], is(@"line2"));
}

- (void)testLastCommentString_shouldIgnoreSingleLineCommentsIfEmptyLineFoundInBetween {
// setup & execute
GBTokenizer *tokenizer = [GBTokenizer tokenizerWithSource:[PKTokenizer tokenizerWithString:@"/// line1\n\n/// line2\n ONE"] filename:@"file"];
Expand Down

0 comments on commit b02fe29

Please sign in to comment.