diff --git a/api/consent.go b/api/consent.go new file mode 100644 index 0000000..3f8b7f9 --- /dev/null +++ b/api/consent.go @@ -0,0 +1,61 @@ +package api + +import ( + "time" + + "github.com/prebid/go-gdpr/consentconstants" +) + +// VendorConsents is a GDPR Vendor Consent string, as defined by IAB Europe. For technical details, +// see https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/Consent%20string%20and%20vendor%20list%20formats%20v1.1%20Final.md#vendor-consent-string-format- +type VendorConsents interface { + // The version of the Consent string. + Version() uint8 + + // The time that the consent string was first created + Created() time.Time + + // The time that the consent string was last updated + LastUpdated() time.Time + + // The ID of the CMP used to update the consent string. + CmpID() uint16 + + // The version of the CMP used to update the consent string + CmpVersion() uint16 + + // The number of the CMP screen where consent was given + ConsentScreen() uint8 + + // The two-letter ISO639-1 language code used by the CMP to ask for consent, in uppercase. + ConsentLanguage() string + + // The VendorListVersion which is needed to interpret this consent string. + // + // The IAB is hosting these on their webpage. For example, version 2 of the + // Vendor List can be found at https://vendorlist.consensu.org/v-2/vendorlist.json + // + // For other versions, just replace the "v-*" path with the value returned here. + // The latest version can always be found at https://vendorlist.consensu.org/vendorlist.json + VendorListVersion() uint16 + + // MaxVendorID describes how many vendors are encoded into the string. + // This is the upper bound (inclusive) on valid inputs for HasConsent(id). + MaxVendorID() uint16 + + // Determine if the user has consented to use data for the given Purpose. + // + // If the purpose is converted from an int > 24, the return value is undefined because + // the consent string doesn't have room for more purposes than that. + PurposeAllowed(id consentconstants.Purpose) bool + + // Determine if a given vendor has consent to collect or receive user info. + // + // This function's behavior is undefined for "invalid" IDs. + // IDs with value < 1 or value > MaxVendorID() are definitely invalid, but IDs within that range + // may still be invalid, depending on the Vendor List. + // + // It is the caller's responsibility to get the right Vendor List version for the semantics of the ID. + // For more information, see VendorListVersion(). + VendorConsent(id uint16) bool +} diff --git a/api/vendorlist.go b/api/vendorlist.go new file mode 100644 index 0000000..17c2527 --- /dev/null +++ b/api/vendorlist.go @@ -0,0 +1,31 @@ +package api + +import "github.com/prebid/go-gdpr/consentconstants" + +// VendorList is an interface used to fetch information about an IAB Global Vendor list. +// For the latest version, see: https://vendorlist.consensu.org/vendorlist.json +type VendorList interface { + // Version returns the version of the vendor list which this is. + // + // If the input was malformed, this will return 0. + Version() uint16 + + // Vendor returns info about the vendor with the given ID. + // This returns nil if that vendor isn't in this list, or the input was malformed somehow. + // + // If callers need to query multiple Purpose or LegitimateInterest statuses from the same vendor, + // they should call this function once and then reuse the object it returns for future queries. + Vendor(vendorID uint16) Vendor +} + +// Vendor describes which purposes a given vendor claims to use data for, in this vendor list. +type Vendor interface { + // Purpose returns true if this vendor claims to use data for the given purpose, or false otherwise + Purpose(purposeID consentconstants.Purpose) bool + + // LegitimateInterest retursn true if this vendor claims a "Legitimate Interest" to + // use data for the given purpose. + // + // For an explanation of legitimate interest, see https://www.gdpreu.org/the-regulation/key-concepts/legitimate-interest/ + LegitimateInterest(purposeID consentconstants.Purpose) bool +} diff --git a/consentconstants/purposes.go b/consentconstants/purposes.go index 4bbdf72..06c47bc 100644 --- a/consentconstants/purposes.go +++ b/consentconstants/purposes.go @@ -5,6 +5,7 @@ package consentconstants // 2. PurposesAllowed of the Consent string: https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/Consent%20string%20and%20vendor%20list%20formats%20v1.1%20Final.md#vendor-consent-string-format- type Purpose uint8 +// TCF 1 Purposes: const ( // InfoStorageAccess includes the storage of information, or access to information that is already stored, // on your device such as advertising identifiers, device identifiers, cookies, and similar technologies. diff --git a/consentconstants/tcf2/purposes.go b/consentconstants/tcf2/purposes.go new file mode 100644 index 0000000..e35b629 --- /dev/null +++ b/consentconstants/tcf2/purposes.go @@ -0,0 +1,91 @@ +package consentconstants + +import base "github.com/prebid/go-gdpr/consentconstants" + +// TCF 2.0 Purposes: +const ( + // InfoStorageAccess includes the storage of information, or access to information that is already stored, + // on your device such as advertising identifiers, device identifiers, cookies, and similar technologies. + InfoStorageAccess base.Purpose = 1 + + // Cookies, device identifiers, or other information can be stored or accessed on your device for the purposes presented to you. + // Vendors can: + // * Store and access information on the device such as cookies and device identifiers presented to a user. + // Reuse InfoStorageAccess above + + // Ads can be shown to you based on the content you are viewing, the app you are using, your approximate location, or your device type. + // To do basic ad selection vendors can: + // * Use real-time information about the context in which the ad will be shown, to show the ad, including information about the content and + // the device, such as: device type and capabilities, user agent, URL, IP address + // * Use a user's non-precise geolocation data + // * Control the frequency of ads shown to a user.\n* Sequence the order in which ads are shown to a user. + // * Prevent an ad from serving in an unsuitable editorial (brand-unsafe) context + // Vendors cannot: + // * Create a personalised ads profile using this information for the selection of future ads. + // * N.B. Non-precise means only an approximate location involving at least a radius of 500 meters is permitted. + BasicAdserving base.Purpose = 2 + + // A profile can be built about you and your interests to show you personalised ads that are relevant to you. + // To create a personalised ads profile vendors can: + // * Collect information about a user, including a user's activity, interests, demographic information, or location, to create or edit a user profile for use in personalised advertising. + // * Combine this information with other information previously collected, including from across websites and apps, to create or edit a user profile for use in personalised advertising. + PersonalizationProfile base.Purpose = 3 + + // Personalised ads can be shown to you based on a profile about you. + // To select personalised ads vendors can: + // * Select personalised ads based on a user profile or other historical user data, including a user's prior activity, interests, visits to sites or apps, location, or demographic information. + PersonalizationSelection base.Purpose = 4 + + // A profile can be built about you and your interests to show you personalised content that is relevant to you. + // To create a personalised content profile vendors can: + // * Collect information about a user, including a user's activity, interests, visits to sites or apps, demographic information, or location, to create or edit a user profile for personalising content. + // * Combine this information with other information previously collected, including from across websites and apps, to create or edit a user profile for use in personalising content. + ContentProfile base.Purpose = 5 + + // Personalised content can be shown to you based on a profile about you. + // To select personalised content vendors can: + // * Select personalised content based on a user profile or other historical user data, including a user\u2019s prior activity, interests, visits to sites or apps, location, or demographic information. + ContentSelection base.Purpose = 6 + + // The performance and effectiveness of ads that you see or interact with can be measured. + // To measure ad performance vendors can: + // * Measure whether and how ads were delivered to and interacted with by a user + // * Provide reporting about ads including their effectiveness and performance + // * Provide reporting about users who interacted with ads using data observed during the course of the user's interaction with that ad + // * Provide reporting to publishers about the ads displayed on their property + // * Measure whether an ad is serving in a suitable editorial environment (brand-safe) context + // * Determine the percentage of the ad that had the opportunity to be seen and the duration of that opportunity + // * Combine this information with other information previously collected, including from across websites and apps + // Vendors cannot: + // *Apply panel- or similarly-derived audience insights data to ad measurement data without a Legal Basis to apply market research to generate audience insights (Purpose 9) + AdPerformance base.Purpose = 7 + + // The performance and effectiveness of content that you see or interact with can be measured. + // To measure content performance vendors can: + // * Measure and report on how content was delivered to and interacted with by users. + // * Provide reporting, using directly measurable or known information, about users who interacted with the content + // * Combine this information with other information previously collected, including from across websites and apps. + // Vendors cannot: + // * Measure whether and how ads (including native ads) were delivered to and interacted with by a user. + // * Apply panel- or similarly derived audience insights data to ad measurement data without a Legal Basis to apply market research to generate audience insights (Purpose 9) + ContentPerformance base.Purpose = 8 + + // Market research can be used to learn more about the audiences who visit sites/apps and view ads. + // To apply market research to generate audience insights vendors can: + // * Provide aggregate reporting to advertisers or their representatives about the audiences reached by their ads, through panel-based and similarly derived insights. + // * Provide aggregate reporting to publishers about the audiences that were served or interacted with content and/or ads on their property by applying panel-based and similarly derived insights. + // * Associate offline data with an online user for the purposes of market research to generate audience insights if vendors have declared to match and combine offline data sources (Feature 1) + // * Combine this information with other information previously collected including from across websites and apps. + // Vendors cannot: + // * Measure the performance and effectiveness of ads that a specific user was served or interacted with, without a Legal Basis to measure ad performance. + // * Measure which content a specific user was served and how they interacted with it, without a Legal Basis to measure content performance. + MarketResearch base.Purpose = 9 + + // Your data can be used to improve existing systems and software, and to develop new products + // To develop new products and improve products vendors can: + // * Use information to improve their existing products with new features and to develop new products + // * Create new models and algorithms through machine learning + // Vendors cannot: + // * Conduct any other data processing operation allowed under a different purpose under this purpose + DevelopImprove base.Purpose = 10 +) diff --git a/vendorconsent/consent.go b/vendorconsent/consent.go index d343749..c1828b2 100644 --- a/vendorconsent/consent.go +++ b/vendorconsent/consent.go @@ -2,93 +2,33 @@ package vendorconsent import ( "encoding/base64" - "time" + "strings" - "github.com/prebid/go-gdpr/consentconstants" + "github.com/prebid/go-gdpr/api" + tcf1 "github.com/prebid/go-gdpr/vendorconsent/tcf1" + tcf2 "github.com/prebid/go-gdpr/vendorconsent/tcf2" ) -// VendorConsents is a GDPR Vendor Consent string, as defined by IAB Europe. For technical details, -// see https://github.com/InteractiveAdvertisingBureau/GDPR-Transparency-and-Consent-Framework/blob/master/Consent%20string%20and%20vendor%20list%20formats%20v1.1%20Final.md#vendor-consent-string-format- -type VendorConsents interface { - // The version of the Consent string. - Version() uint8 - - // The time that the consent string was first created - Created() time.Time - - // The time that the consent string was last updated - LastUpdated() time.Time - - // The ID of the CMP used to update the consent string. - CmpID() uint16 - - // The version of the CMP used to update the consent string - CmpVersion() uint16 - - // The number of the CMP screen where consent was given - ConsentScreen() uint8 - - // The two-letter ISO639-1 language code used by the CMP to ask for consent, in uppercase. - ConsentLanguage() string - - // The VendorListVersion which is needed to interpret this consent string. - // - // The IAB is hosting these on their webpage. For example, version 2 of the - // Vendor List can be found at https://vendorlist.consensu.org/v-2/vendorlist.json - // - // For other versions, just replace the "v-*" path with the value returned here. - // The latest version can always be found at https://vendorlist.consensu.org/vendorlist.json - VendorListVersion() uint16 - - // MaxVendorID describes how many vendors are encoded into the string. - // This is the upper bound (inclusive) on valid inputs for HasConsent(id). - MaxVendorID() uint16 - - // Determine if the user has consented to use data for the given Purpose. - // - // If the purpose is converted from an int > 24, the return value is undefined because - // the consent string doesn't have room for more purposes than that. - PurposeAllowed(id consentconstants.Purpose) bool - - // Determine if a given vendor has consent to collect or receive user info. - // - // This function's behavior is undefined for "invalid" IDs. - // IDs with value < 1 or value > MaxVendorID() are definitely invalid, but IDs within that range - // may still be invalid, depending on the Vendor List. - // - // It is the caller's responsibility to get the right Vendor List version for the semantics of the ID. - // For more information, see VendorListVersion(). - VendorConsent(id uint16) bool -} - // ParseString parses a Raw (unpadded) base64 URL encoded string. -func ParseString(consent string) (VendorConsents, error) { - decoded, err := base64.RawURLEncoding.DecodeString(consent) +func ParseString(consent string) (api.VendorConsents, error) { + pieces := strings.Split(consent, ".") + decoded, err := base64.RawURLEncoding.DecodeString(pieces[0]) if err != nil { return nil, err } - return Parse(decoded) -} - -// Parse the vendor consent data from the string. This string should *not* be encoded (by base64 or any other encoding). -// If the data is malformed and cannot be interpreted as a vendor consent string, this will return an error. -func Parse(data []byte) (VendorConsents, error) { - metadata, err := parseMetadata(data) - if err != nil { - return nil, err + version := uint8(decoded[0] >> 2) + if version == 2 { + return tcf2.Parse(decoded) } + return tcf1.Parse(decoded) +} - // Bit 172 determines whether or not the consent string encodes Vendor data in a RangeSection or BitField. - if isSet(data, 172) { - return parseRangeSection(metadata) - } +// Backwards compatibility - return parseBitField(metadata) +type VendorConsents interface { + api.VendorConsents } -// Returns true if the bitIndex'th bit in data is a 1, and false if it's a 0. -func isSet(data []byte, bitIndex uint) bool { - byteIndex := bitIndex / 8 - bitOffset := bitIndex % 8 - return byteToBool(data[byteIndex] & (0x80 >> bitOffset)) +func Parse(data []byte) (api.VendorConsents, error) { + return tcf1.Parse(data) } diff --git a/vendorconsent/consent20_test.go b/vendorconsent/consent20_test.go new file mode 100644 index 0000000..6da364f --- /dev/null +++ b/vendorconsent/consent20_test.go @@ -0,0 +1,85 @@ +package vendorconsent + +import ( + "encoding/base64" + "testing" + + tcf2 "github.com/prebid/go-gdpr/vendorconsent/tcf2" +) + +// This checks error conditions to verify that we get errors back on malformed strings +func TestInvalidConsentStrings20(t *testing.T) { + // All strings here were encoded using https://cryptii.com/binary-to-base64 from binary to URL-encoded base64 string. + // Beware: this tool only makes sense if your binary strings use full bytes (multiples of 8 digits). + // + // For future tests, a "basline" of valid binary using a BitField, segmented by different vendor consent string semantics, is: + // + // 000010 => Version + // 001110001101011100100010100000101110 => Created date + // 001110001101011100100011000110001010 => LastUpdated date + // 000000000011 => CmpId + // 000000000010 => CmpVersion + // 000111 => ConsentScreen + // 000100001101 => ConsentLangugae + // 000000001110 => VendorListVersion + // 000010 => TcfPolicyVersion + // 0 => IsServiceSpecific + // 0 => UseNonStandardStacks + // 100000000000 => SpecialFeatureOptins + // 001011010010110101101011 => PurposesConsent + // 111111111100000000000000 => PurposesLITransparency + // 0 => PurposeOneTreatement + // 010100010010 => PublisherCC (US if I did tge math right) + // 0000000000000011 => MaxVendorID <= Vendor Consent + // 0 => EncodingType + // 000 => BitFieldSection + // 0000000000000011 => MaxVendorID <= Legitimate Interest + // 0 => EncodingType + // 000 => BitFieldSection + // + // 0000100011100011010111001000101000001011100011100011010111001000110001100010100000000000110000000000100001110001000011010000000011100000100010000000000000101101001011010110101111111111110000000000000000101000100100000000000000011000000000000000000110000 + // CONciguONcjGKADACHENAOCIAC0ta__AACiQABgAAYA + // + // These "bad requests" can be made by tweaking those values to get various errors. + // Bad metadata + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQAA", "vendor consent strings are at least 29 bytes long. This one was 28") + assertInvalid20(t, "AONciguONcjGKADACHENAOCIAC0ta__AACiQABgAAYA", "the consent string encoded a Version of 0, but this value must be greater than or equal to 1") + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQAAAAAMA", "the consent string encoded a MaxVendorID of 0, but this value must be greater than or equal to 1") + assertInvalid20(t, "CONciguONcjGKADACHENAACIAC0ta__AACiQABgAAYA", "the consent string encoded a VendorListVersion of 0, but this value must be greater than or equal to 1") + + // Bad BitFields + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQAeAA", "a BitField for 60 vendors requires a consent string of 36 bytes. This consent string had 30") + + // Bad RangeSections + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwA", "vendor consent strings using RangeSections require at least 31 bytes. Got 30") // This encodes 184 bits + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwAQQ", "rangeSection expected a 16-bit vendorID to start at bit 243, but the consent string was only 31 bytes long") // 1 single vendor, too few bits + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwAYQAC", "rangeSection expected a 16-bit vendorID to start at bit 259, but the consent string was only 33 bytes long") // 1 vendor range, too few bits + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwAgABA", "rangeSection expected a 16-bit vendorID to start at bit 260, but the consent string was only 33 bytes long") // 2 single vendors, too few bits + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwAgAAAAA", "bit 242 range entry excludes vendor 0, but only vendors [1, 3] are valid") + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwAgACAAA", "bit 242 range entry excludes vendor 4, but only vendors [1, 3] are valid") + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwAgABAAAA", "bit 259 range entry excludes vendor 0, but only vendors [1, 3] are valid") + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwAgABAAEA", "bit 259 range entry excludes vendor 4, but only vendors [1, 3] are valid") + assertInvalid20(t, "CONciguONcjGKADACHENAOCIAC0ta__AACiQABwAoABAACA", "bit 242 range entry excludes vendors [2, 1]. The start should be less than the end") +} + +func TestParseValidString20(t *testing.T) { + parsed, err := ParseString("CONciguONcjGKADACHENAOCIAC0ta__AACiQABgAAYA") + assertNilError(t, err) + assertUInt16sEqual(t, 14, parsed.VendorListVersion()) +} + +func assertInvalid20(t *testing.T, urlEncodedString string, expectError string) { + t.Helper() + data, err := base64.RawURLEncoding.DecodeString(urlEncodedString) + assertNilError(t, err) + assertInvalidBytes20(t, data, expectError) +} + +func assertInvalidBytes20(t *testing.T, data []byte, expectError string) { + t.Helper() + if consent, err := tcf2.Parse(data); err == nil { + t.Errorf("base64 URL-encoded string %s was considered valid, but shouldn't be. MaxVendorID: %d. len(data): %d", base64.RawURLEncoding.EncodeToString(data), consent.MaxVendorID(), len(data)) + } else if err.Error() != expectError { + t.Errorf(`error messages did not match. Expected "%s", got "%s": %v`, expectError, err.Error(), err) + } +} diff --git a/vendorconsent/consent_test.go b/vendorconsent/consent_test.go index c113804..7f979e6 100644 --- a/vendorconsent/consent_test.go +++ b/vendorconsent/consent_test.go @@ -3,6 +3,8 @@ package vendorconsent import ( "encoding/base64" "testing" + + tcf1 "github.com/prebid/go-gdpr/vendorconsent/tcf1" ) func TestIsSet(t *testing.T) { @@ -91,7 +93,7 @@ func assertInvalid(t *testing.T, urlEncodedString string, expectError string) { func assertInvalidBytes(t *testing.T, data []byte, expectError string) { t.Helper() - if consent, err := Parse(data); err == nil { + if consent, err := tcf1.Parse(data); err == nil { t.Errorf("base64 URL-encoded string %s was considered valid, but shouldn't be. MaxVendorID: %d. len(data): %d", base64.RawURLEncoding.EncodeToString(data), consent.MaxVendorID(), len(data)) } else if err.Error() != expectError { t.Errorf(`error messages did not match. Expected "%s", got "%s": %v`, expectError, err.Error(), err) @@ -147,3 +149,24 @@ func buildMap(keys ...uint) map[uint]struct{} { } return m } + +func assertNilError(t *testing.T, err error) { + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } +} + +// Returns true if the bitIndex'th bit in data is a 1, and false if it's a 0. +func isSet(data []byte, bitIndex uint) bool { + byteIndex := bitIndex / 8 + bitOffset := bitIndex % 8 + return byteToBool(data[byteIndex] & (0x80 >> bitOffset)) +} + +// byteToBool returns false if val is 0, and true otherwise +func byteToBool(val byte) bool { + if val == 0 { + return false + } + return true +} diff --git a/vendorconsent/bitfield.go b/vendorconsent/tcf1/bitfield.go similarity index 96% rename from vendorconsent/bitfield.go rename to vendorconsent/tcf1/bitfield.go index 39e185a..269a771 100644 --- a/vendorconsent/bitfield.go +++ b/vendorconsent/tcf1/bitfield.go @@ -1,6 +1,8 @@ package vendorconsent -import "fmt" +import ( + "fmt" +) func parseBitField(data consentMetadata) (*consentBitField, error) { vendorBitsRequired := data.MaxVendorID() @@ -51,8 +53,5 @@ func (f *consentBitField) VendorConsent(id uint16) bool { // byteToBool returns false if val is 0, and true otherwise func byteToBool(val byte) bool { - if val == 0 { - return false - } - return true + return val != 0 } diff --git a/vendorconsent/bitfield_test.go b/vendorconsent/tcf1/bitfield_test.go similarity index 100% rename from vendorconsent/bitfield_test.go rename to vendorconsent/tcf1/bitfield_test.go diff --git a/vendorconsent/tcf1/consent.go b/vendorconsent/tcf1/consent.go new file mode 100644 index 0000000..dcf50b2 --- /dev/null +++ b/vendorconsent/tcf1/consent.go @@ -0,0 +1,19 @@ +package vendorconsent + +import "github.com/prebid/go-gdpr/api" + +// Parse the vendor consent data from the string. This string should *not* be encoded (by base64 or any other encoding). +// If the data is malformed and cannot be interpreted as a vendor consent string, this will return an error. +func Parse(data []byte) (api.VendorConsents, error) { + metadata, err := parseMetadata(data) + if err != nil { + return nil, err + } + + // Bit 172 determines whether or not the consent string encodes Vendor data in a RangeSection or BitField. + if isSet(data, 172) { + return parseRangeSection(metadata) + } + + return parseBitField(metadata) +} diff --git a/vendorconsent/metadata.go b/vendorconsent/tcf1/metadata.go similarity index 94% rename from vendorconsent/metadata.go rename to vendorconsent/tcf1/metadata.go index 527060e..e359d1c 100644 --- a/vendorconsent/metadata.go +++ b/vendorconsent/tcf1/metadata.go @@ -121,3 +121,10 @@ func (c consentMetadata) PurposeAllowed(id consentconstants.Purpose) bool { // so in the valid range, this won't even overflow a uint8. return isSet(c, uint(id)+131) } + +// Returns true if the bitIndex'th bit in data is a 1, and false if it's a 0. +func isSet(data []byte, bitIndex uint) bool { + byteIndex := bitIndex / 8 + bitOffset := bitIndex % 8 + return byteToBool(data[byteIndex] & (0x80 >> bitOffset)) +} diff --git a/vendorconsent/metadata_test.go b/vendorconsent/tcf1/metadata_test.go similarity index 94% rename from vendorconsent/metadata_test.go rename to vendorconsent/tcf1/metadata_test.go index b6b52b8..5f4eac2 100644 --- a/vendorconsent/metadata_test.go +++ b/vendorconsent/tcf1/metadata_test.go @@ -64,9 +64,3 @@ func TestLanguageExtremes(t *testing.T) { assertNilError(t, err) assertStringsEqual(t, "ZA", consent.ConsentLanguage()) } - -func assertNilError(t *testing.T, err error) { - if err != nil { - t.Fatalf("Unexpected error: %v", err) - } -} diff --git a/vendorconsent/rangesection.go b/vendorconsent/tcf1/rangesection.go similarity index 100% rename from vendorconsent/rangesection.go rename to vendorconsent/tcf1/rangesection.go diff --git a/vendorconsent/rangesection_test.go b/vendorconsent/tcf1/rangesection_test.go similarity index 100% rename from vendorconsent/rangesection_test.go rename to vendorconsent/tcf1/rangesection_test.go diff --git a/vendorconsent/tcf1/test_utils.go b/vendorconsent/tcf1/test_utils.go new file mode 100644 index 0000000..2466e6f --- /dev/null +++ b/vendorconsent/tcf1/test_utils.go @@ -0,0 +1,78 @@ +package vendorconsent + +import ( + "encoding/base64" + "testing" +) + +func assertInvalid(t *testing.T, urlEncodedString string, expectError string) { + t.Helper() + data, err := base64.RawURLEncoding.DecodeString(urlEncodedString) + assertNilError(t, err) + assertInvalidBytes(t, data, expectError) +} + +func assertInvalidBytes(t *testing.T, data []byte, expectError string) { + t.Helper() + if consent, err := Parse(data); err == nil { + t.Errorf("base64 URL-encoded string %s was considered valid, but shouldn't be. MaxVendorID: %d. len(data): %d", base64.RawURLEncoding.EncodeToString(data), consent.MaxVendorID(), len(data)) + } else if err.Error() != expectError { + t.Errorf(`error messages did not match. Expected "%s", got "%s": %v`, expectError, err.Error(), err) + } +} + +func decode(t *testing.T, encodedString string) []byte { + data, err := base64.RawURLEncoding.DecodeString(encodedString) + assertNilError(t, err) + return data +} + +func assertNilError(t *testing.T, err error) { + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } +} + +func assertStringsEqual(t *testing.T, expected string, actual string) { + t.Helper() + if actual != expected { + t.Errorf("Strings were not equal. Expected %s, actual %s", expected, actual) + } +} + +func assertUInt8sEqual(t *testing.T, expected uint8, actual uint8) { + t.Helper() + if actual != expected { + t.Errorf("Ints were not equal. Expected %d, actual %d", expected, actual) + } +} + +func assertUInt16sEqual(t *testing.T, expected uint16, actual uint16) { + t.Helper() + if actual != expected { + t.Errorf("Ints were not equal. Expected %d, actual %d", expected, actual) + } +} + +func assertIntsEqual(t *testing.T, expected int, actual int) { + t.Helper() + if actual != expected { + t.Errorf("Ints were not equal. Expected %d, actual %d", expected, actual) + } +} + +func assertBoolsEqual(t *testing.T, expected bool, actual bool) { + t.Helper() + if actual != expected { + t.Errorf("Bools were not equal. Expected %t, actual %t", expected, actual) + } +} + +func buildMap(keys ...uint) map[uint]struct{} { + var s struct{} + m := make(map[uint]struct{}, len(keys)) + for _, key := range keys { + m[key] = s + } + return m +} diff --git a/vendorconsent/tcf2/bitfield.go b/vendorconsent/tcf2/bitfield.go new file mode 100644 index 0000000..e2b5272 --- /dev/null +++ b/vendorconsent/tcf2/bitfield.go @@ -0,0 +1,57 @@ +package vendorconsent + +import ( + "fmt" +) + +func parseBitField(data consentMetadata) (*consentBitField, error) { + vendorBitsRequired := data.MaxVendorID() + + // BitFields start at bit 230. This means the last three bits of byte 28 are part of the bitfield. + // In this case "others" will never be used, and we don't risk an index-out-of-bounds by using it. + if vendorBitsRequired <= 3 { + return &consentBitField{ + consentMetadata: data, + firstTwo: data[28], + others: nil, + }, nil + } + + otherBytesRequired := (vendorBitsRequired - 3) / 8 + if (vendorBitsRequired-3)%8 > 0 { + otherBytesRequired = otherBytesRequired + 1 + } + dataLengthRequired := 28 + otherBytesRequired + if uint(len(data)) < uint(dataLengthRequired) { + return nil, fmt.Errorf("a BitField for %d vendors requires a consent string of %d bytes. This consent string had %d", vendorBitsRequired, dataLengthRequired, len(data)) + } + + return &consentBitField{ + consentMetadata: data, + firstTwo: data[28], + others: data[29:], + }, nil +} + +// A BitField has len(MaxVendorID()) entries, with one bit for every vendor in the range. +type consentBitField struct { + consentMetadata + firstTwo byte + others []byte +} + +func (f *consentBitField) VendorConsent(id uint16) bool { + if id < 1 || id > f.MaxVendorID() { + return false + } + // Careful here... vendor IDs start at index 1... + if id <= 3 { + return byteToBool(f.firstTwo & (0x04 >> id)) + } + return isSet(f.others, uint(id-3)) +} + +// byteToBool returns false if val is 0, and true otherwise +func byteToBool(val byte) bool { + return val != 0 +} diff --git a/vendorconsent/tcf2/bitfield_test.go b/vendorconsent/tcf2/bitfield_test.go new file mode 100644 index 0000000..36228f5 --- /dev/null +++ b/vendorconsent/tcf2/bitfield_test.go @@ -0,0 +1,33 @@ +package vendorconsent + +import ( + "testing" + + "github.com/prebid/go-gdpr/consentconstants" +) + +func TestBitField(t *testing.T) { + // String built using http://gdpr-demo.labs.quantcast.com/user-examples/cookie-workshop.html + // This sample includes a BitField. + consent, err := Parse(decode(t, "COwGVJOOwGVJOADACHENAOCAAO6as_-AAAhoAFNLAAoAAAA")) + assertNilError(t, err) + assertUInt8sEqual(t, 2, consent.Version()) + assertUInt16sEqual(t, 3, consent.CmpID()) + assertUInt16sEqual(t, 2, consent.CmpVersion()) + assertUInt8sEqual(t, 7, consent.ConsentScreen()) + assertStringsEqual(t, "EN", consent.ConsentLanguage()) + assertUInt16sEqual(t, 14, consent.VendorListVersion()) + assertUInt16sEqual(t, 10, consent.MaxVendorID()) + + purposesAllowed := buildMap(1, 2, 3, 5, 6, 7, 9, 12, 13, 15, 17, 19, 20, 23, 24) + for i := uint8(1); i <= 24; i++ { + _, ok := purposesAllowed[uint(i)] + assertBoolsEqual(t, ok, consent.PurposeAllowed(consentconstants.Purpose(i))) + } + + vendorsWithConsent := buildMap(1, 2, 4, 7, 9, 10) + for i := uint16(1); i <= consent.MaxVendorID(); i++ { + _, ok := vendorsWithConsent[uint(i)] + assertBoolsEqual(t, ok, consent.VendorConsent(i)) + } +} diff --git a/vendorconsent/tcf2/consent.go b/vendorconsent/tcf2/consent.go new file mode 100644 index 0000000..5f2cd47 --- /dev/null +++ b/vendorconsent/tcf2/consent.go @@ -0,0 +1,19 @@ +package vendorconsent + +import "github.com/prebid/go-gdpr/api" + +// Parse parses the TCF 2.0 vendor consent data from the string. This string should *not* be encoded (by base64 or any other encoding). +// If the data is malformed and cannot be interpreted as a vendor consent string, this will return an error. +func Parse(data []byte) (api.VendorConsents, error) { + metadata, err := parseMetadata(data) + if err != nil { + return nil, err + } + + // Bit 229 determines whether or not the consent string encodes Vendor data in a RangeSection or BitField. + if isSet(data, 229) { + return parseRangeSection(metadata) + } + + return parseBitField(metadata) +} diff --git a/vendorconsent/tcf2/metadata.go b/vendorconsent/tcf2/metadata.go new file mode 100644 index 0000000..e7e10ad --- /dev/null +++ b/vendorconsent/tcf2/metadata.go @@ -0,0 +1,132 @@ +package vendorconsent + +import ( + "encoding/binary" + "errors" + "fmt" + "time" + + "github.com/prebid/go-gdpr/consentconstants" +) + +// Parse the metadata from the consent string. +// This returns an error if the input is too short to answer questions about that data. +func parseMetadata(data []byte) (consentMetadata, error) { + if len(data) < 29 { + return nil, fmt.Errorf("vendor consent strings are at least 29 bytes long. This one was %d", len(data)) + } + metadata := consentMetadata(data) + if metadata.MaxVendorID() < 1 { + return nil, fmt.Errorf("the consent string encoded a MaxVendorID of %d, but this value must be greater than or equal to 1", metadata.MaxVendorID()) + } + if metadata.Version() < 1 { + return nil, fmt.Errorf("the consent string encoded a Version of %d, but this value must be greater than or equal to 1", metadata.Version()) + } + if metadata.VendorListVersion() == 0 { + return nil, errors.New("the consent string encoded a VendorListVersion of 0, but this value must be greater than or equal to 1") + + } + return consentMetadata(data), nil +} + +// consemtMetadata implements the parts of the VendorConsents interface which are common +// to BitFields and RangeSections. This relies on Parse to have done some validation already, +// to make sure that functions on it don't overflow the bounds of the byte array. +type consentMetadata []byte + +func (c consentMetadata) Version() uint8 { + // Stored in bits 0-5 + return uint8(c[0] >> 2) +} + +const ( + nanosPerDeci = 100000000 + decisPerOne = 10 +) + +func (c consentMetadata) Created() time.Time { + // Stored in bits 6-41.. which is [000000xx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xx000000] starting at the 1st byte + deciseconds := int64(binary.BigEndian.Uint64([]byte{ + 0x0, + 0x0, + 0x0, + (c[0]&0x3)<<2 | c[1]>>6, + c[1]<<2 | c[2]>>6, + c[2]<<2 | c[3]>>6, + c[3]<<2 | c[4]>>6, + c[4]<<2 | c[5]>>6, + })) + return time.Unix(deciseconds/decisPerOne, (deciseconds%decisPerOne)*nanosPerDeci) +} + +func (c consentMetadata) LastUpdated() time.Time { + // Stored in bits 42-77... which is [00xxxxxx xxxxxxxx xxxxxxxx xxxxxxxx xxxxxx00 ] starting at the 6th byte + deciseconds := int64(binary.BigEndian.Uint64([]byte{ + 0x0, + 0x0, + 0x0, + (c[5] >> 2) & 0x0f, + c[5]<<6 | c[6]>>2, + c[6]<<6 | c[7]>>2, + c[7]<<6 | c[8]>>2, + c[8]<<6 | c[9]>>2, + })) + return time.Unix(deciseconds/decisPerOne, (deciseconds%decisPerOne)*nanosPerDeci) +} + +func (c consentMetadata) CmpID() uint16 { + // Stored in bits 78-89... which is [000000xx xxxxxxxx xx000000] starting at the 10th byte + leftByte := ((c[9] & 0x03) << 2) | c[10]>>6 + rightByte := (c[10] << 2) | c[11]>>6 + return binary.BigEndian.Uint16([]byte{leftByte, rightByte}) +} + +func (c consentMetadata) CmpVersion() uint16 { + // Stored in bits 90-101.. which is [00xxxxxx xxxxxx00] starting at the 12th byte + leftByte := (c[11] >> 2) & 0x0f + rightByte := (c[11] << 6) | c[12]>>2 + return binary.BigEndian.Uint16([]byte{leftByte, rightByte}) +} + +func (c consentMetadata) ConsentScreen() uint8 { + // Stored in bits 102-107.. which is [000000xx xxxx0000] starting at the 13th byte + return uint8(((c[12] & 0x03) << 4) | c[13]>>4) +} + +func (c consentMetadata) ConsentLanguage() string { + // Stored in bits 108-119... which is [0000xxxx xxxxxxxx] starting at the 14th byte. + // Each letter is stored as 6 bits, with A=0 and Z=25 + leftChar := ((c[13] & 0x0f) << 2) | c[14]>>6 + rightChar := c[14] & 0x3f + return string([]byte{leftChar + 65, rightChar + 65}) // Unicode A-Z is 65-90 +} + +func (c consentMetadata) VendorListVersion() uint16 { + // The vendor list version is stored in bits 120 - 131 + rightByte := ((c[16] & 0xf0) >> 4) | ((c[15] & 0x0f) << 4) + leftByte := c[15] >> 4 + return binary.BigEndian.Uint16([]byte{leftByte, rightByte}) +} + +func (c consentMetadata) MaxVendorID() uint16 { + // The max vendor ID is stored in bits 213 - 228 [00000xxx xxxxxxxx xxxxx000] + leftByte := byte((c[26]&0x07)<<5 + (c[27]&0xf8)>>3) + rightByte := byte((c[27]&0x07)<<5 + (c[28]&0xf8)>>3) + return binary.BigEndian.Uint16([]byte{leftByte, rightByte}) +} + +func (c consentMetadata) PurposeAllowed(id consentconstants.Purpose) bool { + // Purposes are stored in bits 152 - 175. The interface contract only defines behavior for ints in the range [1, 24]... + // so in the valid range, this won't even overflow a uint8. + if id > 24 { + id = 24 + } + return isSet(c, uint(id)+151) +} + +// Returns true if the bitIndex'th bit in data is a 1, and false if it's a 0. +func isSet(data []byte, bitIndex uint) bool { + byteIndex := bitIndex / 8 + bitOffset := bitIndex % 8 + return byteToBool(data[byteIndex] & (0x80 >> bitOffset)) +} diff --git a/vendorconsent/tcf2/metadata_test.go b/vendorconsent/tcf2/metadata_test.go new file mode 100644 index 0000000..e9c87a4 --- /dev/null +++ b/vendorconsent/tcf2/metadata_test.go @@ -0,0 +1,60 @@ +package vendorconsent + +import ( + "testing" + "time" +) + +func TestCreatedDate(t *testing.T) { + consent, err := Parse(decode(t, "COvcSpYOvcSpYC9AAAENAPCAAAAAAAAAAAAACvwDQABAAIAAYABIAC4AJQAagA9ACEAPgAjIBJoCvAK-AAAAAA")) + assertNilError(t, err) + created := consent.Created().UTC() + year, month, day := created.Date() + assertIntsEqual(t, 2020, year) + assertIntsEqual(t, int(time.February), int(month)) + assertIntsEqual(t, 27, day) + assertIntsEqual(t, 19, created.Hour()) + assertIntsEqual(t, 51, created.Minute()) + assertIntsEqual(t, 49, created.Second()) +} + +func TestLastUpdate(t *testing.T) { + consent, err := Parse(decode(t, "COvcSpYOvcSpYC9AAAENAPCAAAAAAAAAAAAACvwDQABAAIAAYABIAC4AJQAagA9ACEAPgAjIBJoCvAK-AAAAAA")) + assertNilError(t, err) + updated := consent.LastUpdated().UTC() + year, month, day := updated.Date() + assertIntsEqual(t, 2020, year) + assertIntsEqual(t, int(time.February), int(month)) + assertIntsEqual(t, 27, day) + assertIntsEqual(t, 19, updated.Hour()) + assertIntsEqual(t, 51, updated.Minute()) + assertIntsEqual(t, 49, updated.Second()) +} + +func TestLargeCmpID(t *testing.T) { + consent, err := Parse(decode(t, "COv_46cOv_46cFZFZTENAPCAAAAAAAAAAAAAE5QBwABAAXABVAH8AgAElgJkATkAYEAgAAQACAAGAAXABUAH8AQIAwAAAA")) + assertNilError(t, err) + assertUInt16sEqual(t, 345, consent.CmpID()) +} + +func TestLargeCmpVersion(t *testing.T) { + consent, err := Parse(decode(t, "COv_46cOv_46cFZFZTENAPCAAAAAAAAAAAAAE5QBwABAAXABVAH8AgAElgJkATkAYEAgAAQACAAGAAXABUAH8AQIAwAAAA")) + assertNilError(t, err) + assertUInt16sEqual(t, 345, consent.CmpVersion()) +} + +func TestLargeConsentScreen(t *testing.T) { + consent, err := Parse(decode(t, "COv_46cOv_46cFZFZTENAPCAAAAAAAAAAAAAE5QBwABAAXABVAH8AgAElgJkATkAYEAgAAQACAAGAAXABUAH8AQIAwAAAA")) + assertNilError(t, err) + assertUInt8sEqual(t, 19, consent.ConsentScreen()) +} + +func TestLanguageExtremes(t *testing.T) { + consent, err := Parse(decode(t, "COv_46cOv_46cFZFZTBGAPCAAAAAAAAAAAAAE5QBwABAAXABVAH8AgAElgJkATkAYEAgAAQACAAGAAXABUAH8AQIAwAAAA")) + assertNilError(t, err) + assertStringsEqual(t, "BG", consent.ConsentLanguage()) + + consent, err = Parse(decode(t, "COv_46cOv_46cFZFZTSVAPCAAAAAAAAAAAAAE5QBwABAAXABVAH8AgAElgJkATkAYEAgAAQACAAGAAXABUAH8AQIAwAAAA")) + assertNilError(t, err) + assertStringsEqual(t, "SV", consent.ConsentLanguage()) +} diff --git a/vendorconsent/tcf2/rangesection.go b/vendorconsent/tcf2/rangesection.go new file mode 100644 index 0000000..3cd2323 --- /dev/null +++ b/vendorconsent/tcf2/rangesection.go @@ -0,0 +1,156 @@ +package vendorconsent + +import ( + "encoding/binary" + "fmt" +) + +func parseRangeSection(data consentMetadata) (*rangeSection, error) { + + // This makes an int from bits 230-241 + if len(data) < 31 { + return nil, fmt.Errorf("vendor consent strings using RangeSections require at least 31 bytes. Got %d", len(data)) + } + numEntries := parseNumEntries(data) + + // Parse out the "exceptions" here. + currentOffset := uint(242) + consents := make([]rangeConsent, numEntries) + for i := uint16(0); i < numEntries; i++ { + thisConsent, bitsConsumed, err := parseRangeConsent(data, currentOffset) + if err != nil { + return nil, err + } + consents[i] = thisConsent + currentOffset = currentOffset + bitsConsumed + } + + return &rangeSection{ + consentMetadata: data, + consents: consents, + }, nil +} + +// parse the value of NumEntries, assuming this consent string uses a RangeEntry +func parseNumEntries(data []byte) uint16 { + // This should isolate the bits [000000xx, xxxxxxxx, xx000000] to get bits 230-241 as an int + leftByte := ((data[28] & 0x03) << 2) | (data[29] >> 6) + rightByte := (data[29] << 2) | (data[30] >> 6) + + return binary.BigEndian.Uint16([]byte{leftByte, rightByte}) +} + +// RangeSection Exception implemnetations + +// parseRangeConsents parses a RangeSection starting from the initial bit. +// It returns the exception, as well as the number of bits consumed by the parsing. +func parseRangeConsent(data consentMetadata, initialBit uint) (rangeConsent, uint, error) { + // Fixes #10 + if uint(len(data)) <= initialBit/8 { + return nil, 0, fmt.Errorf("bit %d was supposed to start a new RangeEntry, but the consent string was only %d bytes long", initialBit, len(data)) + } + // If the first bit is set, it's a Range of IDs + if isSet(data, initialBit) { + start, err := parseUInt16(data, initialBit+1) + if err != nil { + return nil, 0, err + } + end, err := parseUInt16(data, initialBit+17) + if err != nil { + return nil, 0, err + } + if start == 0 { + return nil, 0, fmt.Errorf("bit %d range entry exclusion starts at 0, but the min vendor ID is 1", initialBit) + } + if end > data.MaxVendorID() { + return nil, 0, fmt.Errorf("bit %d range entry exclusion ends at %d, but the max vendor ID is %d", initialBit, end, data.MaxVendorID()) + } + if end <= start { + return nil, 0, fmt.Errorf("bit %d range entry excludes vendors [%d, %d]. The start should be less than the end", initialBit, start, end) + } + return rangeVendorConsent{ + startID: start, + endID: end, + }, uint(33), nil + } + + vendorID, err := parseUInt16(data, initialBit+1) + if err != nil { + return nil, 0, err + } + if vendorID == 0 || vendorID > data.MaxVendorID() { + return nil, 0, fmt.Errorf("bit %d range entry excludes vendor %d, but only vendors [1, %d] are valid", initialBit, vendorID, data.MaxVendorID()) + } + + return singleVendorConsent(vendorID), 17, nil +} + +// parseUInt16 parses a 16-bit integer from the data array, starting at the given index +func parseUInt16(data []byte, bitStartIndex uint) (uint16, error) { + startByte := bitStartIndex / 8 + bitStartOffset := bitStartIndex % 8 + if bitStartOffset == 0 { + if uint(len(data)) < (startByte + 2) { + return 0, fmt.Errorf("rangeSection expected a 16-bit vendorID to start at bit %d, but the consent string was only %d bytes long", bitStartIndex, len(data)) + } + return binary.BigEndian.Uint16(data[startByte : startByte+2]), nil + } + if uint(len(data)) < (startByte + 3) { + return 0, fmt.Errorf("rangeSection expected a 16-bit vendorID to start at bit %d, but the consent string was only %d bytes long", bitStartIndex, len(data)) + } + + shiftComplement := 8 - bitStartOffset + + // Take the rightmost bits of the left byte, and the leftmost bits of the middle byte + leftByte := (data[startByte] & (0xff >> bitStartOffset)) << bitStartOffset + leftByte = leftByte | (data[startByte+1] >> shiftComplement) + + // Take the rightmost bits of the middle byte, and the leftmost bits of the right byte + rightByte := data[startByte+2] & (0xff << shiftComplement) + rightByte = (rightByte >> shiftComplement) | (data[startByte+1] << bitStartOffset) + + return binary.BigEndian.Uint16([]byte{leftByte, rightByte}), nil +} + +// A RangeConsents encodes consents that have been registered. +type rangeSection struct { + consentMetadata + consents []rangeConsent +} + +// VendorConsents implementation +func (p rangeSection) VendorConsent(id uint16) bool { + if id < 1 || id > p.MaxVendorID() { + return false + } + + for i := 0; i < len(p.consents); i++ { + if p.consents[i].Contains(id) { + return true + } + } + return false +} + +// A RangeSection has a default consent value and a list of "exceptions". This represents an "exception" blob +type rangeConsent interface { + Contains(id uint16) bool +} + +// This is a RangeSection exception for a single vendor. +type singleVendorConsent uint16 + +func (e singleVendorConsent) Contains(id uint16) bool { + return uint16(e) == id +} + +// This is a RangeSection exception for a range of IDs. +// The start and end bounds here are inclusive. +type rangeVendorConsent struct { + startID uint16 + endID uint16 +} + +func (e rangeVendorConsent) Contains(id uint16) bool { + return e.startID <= id && e.endID >= id +} diff --git a/vendorconsent/tcf2/rangesection_test.go b/vendorconsent/tcf2/rangesection_test.go new file mode 100644 index 0000000..a231429 --- /dev/null +++ b/vendorconsent/tcf2/rangesection_test.go @@ -0,0 +1,39 @@ +package vendorconsent + +import ( + "testing" +) + +func TestRangeSectionConsent(t *testing.T) { + // String built using http://iabtcf.com/#/encode + // This sample encodes a mix of Single- and Range-typed consent exceptions. + consent, err := Parse(decode(t, "COv_46cOv_46cADACHENAPCAAAAAAAAAAAAAE5QBwABAAXABVAH8AgAElgJkATkAYEAgAAQACAAGAAXABUAH8AQIAwAAAA")) + assertNilError(t, err) + assertUInt8sEqual(t, 2, consent.Version()) + assertUInt16sEqual(t, 3, consent.CmpID()) + assertUInt16sEqual(t, 2, consent.CmpVersion()) + assertUInt8sEqual(t, 7, consent.ConsentScreen()) + assertStringsEqual(t, "EN", consent.ConsentLanguage()) + assertUInt16sEqual(t, 15, consent.VendorListVersion()) + assertUInt16sEqual(t, 626, consent.MaxVendorID()) + + // The above encoder doesn't support setting purposes. + // purposesWithConsent := buildMap(1, 3, 5, 6, 7, 10) + // for i := uint8(1); i <= 24; i++ { + // _, ok := purposesWithConsent[uint(i)] + // assertBoolsEqual(t, ok, consent.PurposeAllowed(consentconstants.Purpose(i))) + // } + + vendorsWithConsent := buildMap(2, 23, 42, 127, 128, 587, 612, 626) + for i := uint16(1); i <= consent.MaxVendorID(); i++ { + _, ok := vendorsWithConsent[uint(i)] + assertBoolsEqual(t, ok, consent.VendorConsent(i)) + } +} + +// Prevents #10 +func TestInvalidRangeEdgeCase(t *testing.T) { + data := decode(t, "COwDzqZOwDzqZN4ABMENAPCAAP4AAP-AAAhoAFQAYABgAOABQAAAAA") + data = data[:31] + assertInvalidBytes(t, data[:31], "rangeSection expected a 16-bit vendorID to start at bit 243, but the consent string was only 31 bytes long") +} diff --git a/vendorconsent/tcf2/test_utils.go b/vendorconsent/tcf2/test_utils.go new file mode 100644 index 0000000..2466e6f --- /dev/null +++ b/vendorconsent/tcf2/test_utils.go @@ -0,0 +1,78 @@ +package vendorconsent + +import ( + "encoding/base64" + "testing" +) + +func assertInvalid(t *testing.T, urlEncodedString string, expectError string) { + t.Helper() + data, err := base64.RawURLEncoding.DecodeString(urlEncodedString) + assertNilError(t, err) + assertInvalidBytes(t, data, expectError) +} + +func assertInvalidBytes(t *testing.T, data []byte, expectError string) { + t.Helper() + if consent, err := Parse(data); err == nil { + t.Errorf("base64 URL-encoded string %s was considered valid, but shouldn't be. MaxVendorID: %d. len(data): %d", base64.RawURLEncoding.EncodeToString(data), consent.MaxVendorID(), len(data)) + } else if err.Error() != expectError { + t.Errorf(`error messages did not match. Expected "%s", got "%s": %v`, expectError, err.Error(), err) + } +} + +func decode(t *testing.T, encodedString string) []byte { + data, err := base64.RawURLEncoding.DecodeString(encodedString) + assertNilError(t, err) + return data +} + +func assertNilError(t *testing.T, err error) { + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } +} + +func assertStringsEqual(t *testing.T, expected string, actual string) { + t.Helper() + if actual != expected { + t.Errorf("Strings were not equal. Expected %s, actual %s", expected, actual) + } +} + +func assertUInt8sEqual(t *testing.T, expected uint8, actual uint8) { + t.Helper() + if actual != expected { + t.Errorf("Ints were not equal. Expected %d, actual %d", expected, actual) + } +} + +func assertUInt16sEqual(t *testing.T, expected uint16, actual uint16) { + t.Helper() + if actual != expected { + t.Errorf("Ints were not equal. Expected %d, actual %d", expected, actual) + } +} + +func assertIntsEqual(t *testing.T, expected int, actual int) { + t.Helper() + if actual != expected { + t.Errorf("Ints were not equal. Expected %d, actual %d", expected, actual) + } +} + +func assertBoolsEqual(t *testing.T, expected bool, actual bool) { + t.Helper() + if actual != expected { + t.Errorf("Bools were not equal. Expected %t, actual %t", expected, actual) + } +} + +func buildMap(keys ...uint) map[uint]struct{} { + var s struct{} + m := make(map[uint]struct{}, len(keys)) + for _, key := range keys { + m[key] = s + } + return m +} diff --git a/vendorlist/eager-parsing.go b/vendorlist/eager-parsing.go index 086f6ea..0dd6f74 100644 --- a/vendorlist/eager-parsing.go +++ b/vendorlist/eager-parsing.go @@ -4,6 +4,7 @@ import ( "encoding/json" "errors" + "github.com/prebid/go-gdpr/api" "github.com/prebid/go-gdpr/consentconstants" ) @@ -15,7 +16,7 @@ import ( // 2. You need strong input validation and good error messages. // // Otherwise, you may get better performance with ParseLazily. -func ParseEagerly(data []byte) (VendorList, error) { +func ParseEagerly(data []byte) (api.VendorList, error) { var contract vendorListContract if err := json.Unmarshal(data, &contract); err != nil { return nil, err @@ -68,7 +69,7 @@ func (l parsedVendorList) Version() uint16 { return l.version } -func (l parsedVendorList) Vendor(vendorID uint16) Vendor { +func (l parsedVendorList) Vendor(vendorID uint16) api.Vendor { vendor, ok := l.vendors[vendorID] if ok { return vendor diff --git a/vendorlist/eager-parsing_test.go b/vendorlist/eager-parsing_test.go index 102bf85..96aaf19 100644 --- a/vendorlist/eager-parsing_test.go +++ b/vendorlist/eager-parsing_test.go @@ -1,9 +1,13 @@ package vendorlist -import "testing" +import ( + "testing" + + "github.com/prebid/go-gdpr/api" +) func TestEagerlyParsedVendorList(t *testing.T) { - AssertVendorlistCorrectness(t, func(data []byte) VendorList { + AssertVendorlistCorrectness(t, func(data []byte) api.VendorList { vendorList, err := ParseEagerly(data) if err != nil { t.Errorf("ParseEagerly returned an unexpected error: %v", err) diff --git a/vendorlist/lazy-parsing.go b/vendorlist/lazy-parsing.go index 98d2738..185e784 100644 --- a/vendorlist/lazy-parsing.go +++ b/vendorlist/lazy-parsing.go @@ -4,6 +4,7 @@ import ( "strconv" "github.com/buger/jsonparser" + "github.com/prebid/go-gdpr/api" "github.com/prebid/go-gdpr/consentconstants" ) @@ -15,7 +16,7 @@ import ( // 2. You don't need good errors on malformed input // // Otherwise, you may get better performance with ParseEagerly. -func ParseLazily(data []byte) VendorList { +func ParseLazily(data []byte) api.VendorList { return lazyVendorList(data) } @@ -28,7 +29,7 @@ func (l lazyVendorList) Version() uint16 { return 0 } -func (l lazyVendorList) Vendor(vendorID uint16) Vendor { +func (l lazyVendorList) Vendor(vendorID uint16) api.Vendor { var vendorBytes []byte jsonparser.ArrayEach(l, func(value []byte, dataType jsonparser.ValueType, offset int, err error) { if val, ok := lazyParseInt(value, "id"); ok { diff --git a/vendorlist/shared_test.go b/vendorlist/shared_test.go index cfb34e8..55795c7 100644 --- a/vendorlist/shared_test.go +++ b/vendorlist/shared_test.go @@ -2,14 +2,16 @@ package vendorlist import ( "testing" + + "github.com/prebid/go-gdpr/api" ) -func AssertVendorlistCorrectness(t *testing.T, parser func(data []byte) VendorList) { +func AssertVendorlistCorrectness(t *testing.T, parser func(data []byte) api.VendorList) { t.Run("TestVendorList", vendorListTester(parser)) t.Run("TestVendor", vendorTester(parser)) } -func vendorListTester(parser func(data []byte) VendorList) func(*testing.T) { +func vendorListTester(parser func(data []byte) api.VendorList) func(*testing.T) { return func(t *testing.T) { list := parser([]byte(testData)) assertIntsEqual(t, 5, int(list.Version())) @@ -18,7 +20,7 @@ func vendorListTester(parser func(data []byte) VendorList) func(*testing.T) { } } -func vendorTester(parser func(data []byte) VendorList) func(*testing.T) { +func vendorTester(parser func(data []byte) api.VendorList) func(*testing.T) { return func(t *testing.T) { list := parser([]byte(testData)) v := list.Vendor(32) @@ -68,7 +70,7 @@ func assertBoolsEqual(t *testing.T, expected bool, actual bool) { } } -func assertNil(t *testing.T, value Vendor, expectNil bool) { +func assertNil(t *testing.T, value api.Vendor, expectNil bool) { t.Helper() if expectNil && value != nil { t.Error("The vendor should be nil, but wasn't.") diff --git a/vendorlist/vendorlist.go b/vendorlist/vendorlist.go index c4b7b7a..00e7674 100644 --- a/vendorlist/vendorlist.go +++ b/vendorlist/vendorlist.go @@ -1,31 +1,15 @@ package vendorlist -import "github.com/prebid/go-gdpr/consentconstants" +import ( + "github.com/prebid/go-gdpr/api" +) -// VendorList is an interface used to fetch information about an IAB Global Vendor list. -// For the latest version, see: https://vendorlist.consensu.org/vendorlist.json -type VendorList interface { - // Version returns the version of the vendor list which this is. - // - // If the input was malformed, this will return 0. - Version() uint16 +// Copying from API for backwards compatibility - // Vendor returns info about the vendor with the given ID. - // This returns nil if that vendor isn't in this list, or the input was malformed somehow. - // - // If callers need to query multiple Purpose or LegitimateInterest statuses from the same vendor, - // they should call this function once and then reuse the object it returns for future queries. - Vendor(vendorID uint16) Vendor +type VendorList interface { + api.VendorList } -// Vendor describes which purposes a given vendor claims to use data for, in this vendor list. type Vendor interface { - // Purpose returns true if this vendor claims to use data for the given purpose, or false otherwise - Purpose(purposeID consentconstants.Purpose) bool - - // LegitimateInterest retursn true if this vendor claims a "Legitimate Interest" to - // use data for the given purpose. - // - // For an explanation of legitimate interest, see https://www.gdpreu.org/the-regulation/key-concepts/legitimate-interest/ - LegitimateInterest(purposeID consentconstants.Purpose) bool + api.Vendor } diff --git a/vendorlist2/eager-parsing.go b/vendorlist2/eager-parsing.go new file mode 100644 index 0000000..2807973 --- /dev/null +++ b/vendorlist2/eager-parsing.go @@ -0,0 +1,116 @@ +package vendorlist2 + +import ( + "encoding/json" + "errors" + + "github.com/prebid/go-gdpr/api" + "github.com/prebid/go-gdpr/consentconstants" +) + +// ParseEagerly interprets and validates the Vendor List data up front, before returning it. +// The returned object can be shared safely between goroutines. +// +// This is ideal if: +// 1. You plan to call functions on the returned VendorList many times before discarding it. +// 2. You need strong input validation and good error messages. +// +// Otherwise, you may get better performance with ParseLazily. +func ParseEagerly(data []byte) (api.VendorList, error) { + var contract vendorListContract + if err := json.Unmarshal(data, &contract); err != nil { + return nil, err + } + + if contract.Version == 0 { + return nil, errors.New("data.vendorListVersion was 0 or undefined. Versions should start at 1") + } + if len(contract.Vendors) == 0 { + return nil, errors.New("data.vendors was undefined or had no elements") + } + + parsedList := parsedVendorList{ + version: contract.Version, + vendors: make(map[uint16]parsedVendor, len(contract.Vendors)), + } + + for _, v := range contract.Vendors { + parsedList.vendors[v.ID] = parseVendor(v) + } + + return parsedList, nil +} + +func parseVendor(contract vendorListVendorContract) parsedVendor { + parsed := parsedVendor{ + purposes: mapify(contract.Purposes), + legitimateInterests: mapify(contract.LegitimateInterests), + flexiblePurposes: mapify(contract.FlexiblePurposes), + } + + return parsed +} + +func mapify(input []uint8) map[consentconstants.Purpose]struct{} { + m := make(map[consentconstants.Purpose]struct{}, len(input)) + var s struct{} + for _, value := range input { + m[consentconstants.Purpose(value)] = s + } + return m +} + +type parsedVendorList struct { + version uint16 + vendors map[uint16]parsedVendor +} + +func (l parsedVendorList) Version() uint16 { + return l.version +} + +func (l parsedVendorList) Vendor(vendorID uint16) api.Vendor { + vendor, ok := l.vendors[vendorID] + if ok { + return vendor + } + return nil +} + +type parsedVendor struct { + purposes map[consentconstants.Purpose]struct{} + legitimateInterests map[consentconstants.Purpose]struct{} + flexiblePurposes map[consentconstants.Purpose]struct{} +} + +func (l parsedVendor) Purpose(purposeID consentconstants.Purpose) (hasPurpose bool) { + _, hasPurpose = l.purposes[purposeID] + if !hasPurpose { + _, hasPurpose = l.flexiblePurposes[purposeID] + } + return +} + +// LegitimateInterest retursn true if this vendor claims a "Legitimate Interest" to +// use data for the given purpose. +// +// For an explanation of legitimate interest, see https://www.gdpreu.org/the-regulation/key-concepts/legitimate-interest/ +func (l parsedVendor) LegitimateInterest(purposeID consentconstants.Purpose) (hasLegitimateInterest bool) { + _, hasLegitimateInterest = l.legitimateInterests[purposeID] + if !hasLegitimateInterest { + _, hasLegitimateInterest = l.flexiblePurposes[purposeID] + } + return +} + +type vendorListContract struct { + Version uint16 `json:"vendorListVersion"` + Vendors map[string]vendorListVendorContract `json:"vendors"` +} + +type vendorListVendorContract struct { + ID uint16 `json:"id"` + Purposes []uint8 `json:"purposes"` + LegitimateInterests []uint8 `json:"legIntPurposes"` + FlexiblePurposes []uint8 `json:"flexiblePurposes"` +} diff --git a/vendorlist2/eager-parsing_test.go b/vendorlist2/eager-parsing_test.go new file mode 100644 index 0000000..7a4085e --- /dev/null +++ b/vendorlist2/eager-parsing_test.go @@ -0,0 +1,17 @@ +package vendorlist2 + +import ( + "testing" + + "github.com/prebid/go-gdpr/api" +) + +func TestEagerlyParsedVendorList(t *testing.T) { + AssertVendorlistCorrectness(t, func(data []byte) api.VendorList { + vendorList, err := ParseEagerly(data) + if err != nil { + t.Errorf("ParseEagerly returned an unexpected error: %v", err) + } + return vendorList + }) +} diff --git a/vendorlist2/lazy-parsing.go b/vendorlist2/lazy-parsing.go new file mode 100644 index 0000000..a29821f --- /dev/null +++ b/vendorlist2/lazy-parsing.go @@ -0,0 +1,84 @@ +package vendorlist2 + +import ( + "strconv" + + "github.com/buger/jsonparser" + "github.com/prebid/go-gdpr/api" + "github.com/prebid/go-gdpr/consentconstants" +) + +// ParseLazily returns a view of the data which re-calculates things on each function call. +// The returned object can be shared safely between goroutines. +// +// This is ideal if: +// 1. You only need to look up a few vendors or purpose IDs +// 2. You don't need good errors on malformed input +// +// Otherwise, you may get better performance with ParseEagerly. +func ParseLazily(data []byte) api.VendorList { + return lazyVendorList(data) +} + +type lazyVendorList []byte + +func (l lazyVendorList) Version() uint16 { + if val, ok := lazyParseInt(l, "vendorListVersion"); ok { + return uint16(val) + } + return 0 +} + +func (l lazyVendorList) Vendor(vendorID uint16) api.Vendor { + vendorBytes, _, _, err := jsonparser.Get(l, "vendors", strconv.Itoa(int(vendorID))) + if err == nil && len(vendorBytes) > 0 { + return lazyVendor(vendorBytes) + } + return nil +} + +type lazyVendor []byte + +func (l lazyVendor) Purpose(purposeID consentconstants.Purpose) bool { + exists := idExists(l, int(purposeID), "purposes") + if exists { + return true + } + return idExists(l, int(purposeID), "flexiblePurposes") +} + +func (l lazyVendor) LegitimateInterest(purposeID consentconstants.Purpose) bool { + exists := idExists(l, int(purposeID), "legIntPurposes") + if exists { + return true + } + return idExists(l, int(purposeID), "flexiblePurposes") +} + +// Returns false unless "id" exists in an array located at "data.key". +func idExists(data []byte, id int, key string) bool { + hasID := false + + jsonparser.ArrayEach(data, func(value []byte, dataType jsonparser.ValueType, offset int, err error) { + if err == nil && dataType == jsonparser.Number { + if intVal, err := strconv.ParseInt(string(value), 10, 0); err == nil { + if int(intVal) == id { + hasID = true + } + } + } + }, key) + + return hasID +} + +func lazyParseInt(data []byte, key string) (int, bool) { + if value, dataType, _, err := jsonparser.Get(data, key); err == nil && dataType == jsonparser.Number { + intVal, err := strconv.Atoi(string(value)) + if err != nil { + return 0, false + } + return intVal, true + } + return 0, false +} diff --git a/vendorlist2/lazy-parsing_test.go b/vendorlist2/lazy-parsing_test.go new file mode 100644 index 0000000..7b959ef --- /dev/null +++ b/vendorlist2/lazy-parsing_test.go @@ -0,0 +1,9 @@ +package vendorlist2 + +import ( + "testing" +) + +func TestLazyParsedVendorList(t *testing.T) { + AssertVendorlistCorrectness(t, ParseLazily) +} diff --git a/vendorlist2/shared_test.go b/vendorlist2/shared_test.go new file mode 100644 index 0000000..04a99e8 --- /dev/null +++ b/vendorlist2/shared_test.go @@ -0,0 +1,108 @@ +package vendorlist2 + +import ( + "testing" + + "github.com/prebid/go-gdpr/api" +) + +func AssertVendorlistCorrectness(t *testing.T, parser func(data []byte) api.VendorList) { + t.Run("TestVendorList", vendorListTester(parser)) + t.Run("TestVendor", vendorTester(parser)) +} + +func vendorListTester(parser func(data []byte) api.VendorList) func(*testing.T) { + return func(t *testing.T) { + list := parser([]byte(testData)) + assertIntsEqual(t, 28, int(list.Version())) + assertNil(t, list.Vendor(2), true) + assertNil(t, list.Vendor(8), false) + } +} + +func vendorTester(parser func(data []byte) api.VendorList) func(*testing.T) { + return func(t *testing.T) { + list := parser([]byte(testData)) + v := list.Vendor(8) + assertBoolsEqual(t, true, v.Purpose(1)) + assertBoolsEqual(t, true, v.Purpose(2)) + assertBoolsEqual(t, true, v.Purpose(3)) + assertBoolsEqual(t, true, v.Purpose(4)) + assertBoolsEqual(t, false, v.Purpose(5)) + assertBoolsEqual(t, false, v.Purpose(6)) + + assertBoolsEqual(t, false, v.LegitimateInterest(1)) + assertBoolsEqual(t, true, v.LegitimateInterest(2)) + assertBoolsEqual(t, false, v.LegitimateInterest(3)) + + v = list.Vendor(80) + assertBoolsEqual(t, true, v.Purpose(1)) + assertBoolsEqual(t, true, v.Purpose(2)) + assertBoolsEqual(t, false, v.Purpose(3)) + assertBoolsEqual(t, true, v.Purpose(4)) + assertBoolsEqual(t, false, v.Purpose(5)) + assertBoolsEqual(t, false, v.Purpose(6)) + + assertBoolsEqual(t, false, v.LegitimateInterest(1)) + assertBoolsEqual(t, true, v.LegitimateInterest(2)) + assertBoolsEqual(t, false, v.LegitimateInterest(3)) + } + +} + +const testData = ` +{ + "gvlSpecificationVersion": 2, + "vendorListVersion": 28, + "tcfPolicyVersion": 2, + "lastUpdated": "2020-03-05T16:05:29Z", + "vendors": { + "8": { + "id": 8, + "name": "Emerse Sverige AB", + "purposes": [1, 3, 4], + "legIntPurposes": [2, 7, 8, 9], + "flexiblePurposes": [2, 9], + "specialPurposes": [1, 2], + "features": [1, 2], + "specialFeatures": [], + "policyUrl": "https://www.emerse.com/privacy-policy/" + }, + "80": { + "id": 80, + "name": "Sharethrough, Inc", + "purposes": [1, 2, 4, 7, 9, 10], + "legIntPurposes": [], + "flexiblePurposes": [2, 4, 7, 9, 10], + "specialPurposes": [], + "features": [], + "specialFeatures": [], + "policyUrl": "https://platform-cdn.sharethrough.com/privacy-policy" + } + } +} +` + +func assertIntsEqual(t *testing.T, expected int, actual int) { + t.Helper() + if actual != expected { + t.Errorf("Ints were not equal. Expected %d, actual %d", expected, actual) + } +} + +func assertBoolsEqual(t *testing.T, expected bool, actual bool) { + t.Helper() + if actual != expected { + t.Errorf("Bools were not equal. Expected %t, actual %t", expected, actual) + } +} + +func assertNil(t *testing.T, value api.Vendor, expectNil bool) { + t.Helper() + if expectNil && value != nil { + t.Error("The vendor should be nil, but wasn't.") + } + if !expectNil && value == nil { + t.Errorf("The vendor should not be nil, but was.") + } +}